Files
toon/benchmarks/results/accuracy/raw-results.json
2025-10-27 16:02:51 +01:00

26238 lines
575 KiB
JSON

[
{
"questionId": "q1",
"format": "json",
"model": "gpt-5-nano",
"expected": "56176",
"actual": "56176",
"isCorrect": true,
"inputTokens": 6390,
"outputTokens": 136,
"latencyMs": 1973.9505419999998
},
{
"questionId": "q1",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "56176",
"actual": "56176",
"isCorrect": true,
"inputTokens": 7870,
"outputTokens": 6,
"latencyMs": 1337.454
},
{
"questionId": "q1",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "56176",
"actual": "56176",
"isCorrect": true,
"inputTokens": 7908,
"outputTokens": 5,
"latencyMs": 2219.8078330000003
},
{
"questionId": "q1",
"format": "toon",
"model": "gpt-5-nano",
"expected": "56176",
"actual": "56176",
"isCorrect": true,
"inputTokens": 2527,
"outputTokens": 72,
"latencyMs": 2159.820958
},
{
"questionId": "q1",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "56176",
"actual": "56176",
"isCorrect": true,
"inputTokens": 2982,
"outputTokens": 6,
"latencyMs": 1456.8202079999999
},
{
"questionId": "q1",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "56176",
"actual": "56176",
"isCorrect": true,
"inputTokens": 3317,
"outputTokens": 5,
"latencyMs": 2502.1313750000004
},
{
"questionId": "q1",
"format": "csv",
"model": "gpt-5-nano",
"expected": "56176",
"actual": "56176",
"isCorrect": true,
"inputTokens": 2381,
"outputTokens": 72,
"latencyMs": 2189.1171249999998
},
{
"questionId": "q1",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "56176",
"actual": "56176",
"isCorrect": true,
"inputTokens": 2856,
"outputTokens": 6,
"latencyMs": 1251.8321250000001
},
{
"questionId": "q1",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "56176",
"actual": "56176",
"isCorrect": true,
"inputTokens": 3191,
"outputTokens": 5,
"latencyMs": 2795.7488749999998
},
{
"questionId": "q1",
"format": "xml",
"model": "gpt-5-nano",
"expected": "56176",
"actual": "56176",
"isCorrect": true,
"inputTokens": 7357,
"outputTokens": 136,
"latencyMs": 13798.979167
},
{
"questionId": "q1",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "56176",
"actual": "56176",
"isCorrect": true,
"inputTokens": 9360,
"outputTokens": 6,
"latencyMs": 1484.293458
},
{
"questionId": "q1",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "56176",
"actual": "56176",
"isCorrect": true,
"inputTokens": 9097,
"outputTokens": 5,
"latencyMs": 2323.462083
},
{
"questionId": "q1",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "56176",
"actual": "56176",
"isCorrect": true,
"inputTokens": 5012,
"outputTokens": 8,
"latencyMs": 2319.068875
},
{
"questionId": "q1",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "56176",
"actual": "56176",
"isCorrect": true,
"inputTokens": 5760,
"outputTokens": 6,
"latencyMs": 1252.173292
},
{
"questionId": "q1",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "56176",
"actual": "56176",
"isCorrect": true,
"inputTokens": 5743,
"outputTokens": 5,
"latencyMs": 1856.926
},
{
"questionId": "q2",
"format": "json",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 6390,
"outputTokens": 71,
"latencyMs": 2500.574542
},
{
"questionId": "q2",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 7869,
"outputTokens": 4,
"latencyMs": 1249.101917
},
{
"questionId": "q2",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 7908,
"outputTokens": 1,
"latencyMs": 1744.0090420000001
},
{
"questionId": "q2",
"format": "toon",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 2527,
"outputTokens": 71,
"latencyMs": 2319.50975
},
{
"questionId": "q2",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 2981,
"outputTokens": 4,
"latencyMs": 1258.086833
},
{
"questionId": "q2",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 3317,
"outputTokens": 1,
"latencyMs": 1847.8221249999997
},
{
"questionId": "q2",
"format": "csv",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 2381,
"outputTokens": 71,
"latencyMs": 4817.745874999999
},
{
"questionId": "q2",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 2855,
"outputTokens": 4,
"latencyMs": 1024.5234999999998
},
{
"questionId": "q2",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 3191,
"outputTokens": 1,
"latencyMs": 1336.0151660000001
},
{
"questionId": "q2",
"format": "xml",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 7357,
"outputTokens": 135,
"latencyMs": 4109.140791
},
{
"questionId": "q2",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 9359,
"outputTokens": 4,
"latencyMs": 1267.7541249999995
},
{
"questionId": "q2",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 9097,
"outputTokens": 1,
"latencyMs": 1808.7597920000007
},
{
"questionId": "q2",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 5012,
"outputTokens": 71,
"latencyMs": 4865.839082999999
},
{
"questionId": "q2",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 5759,
"outputTokens": 4,
"latencyMs": 1018.2179999999998
},
{
"questionId": "q2",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 5743,
"outputTokens": 1,
"latencyMs": 2534.4780839999994
},
{
"questionId": "q3",
"format": "json",
"model": "gpt-5-nano",
"expected": "lorenza.kunze@yahoo.com",
"actual": "lorenza.kunze@yahoo.com",
"isCorrect": true,
"inputTokens": 6392,
"outputTokens": 204,
"latencyMs": 3778.0985
},
{
"questionId": "q3",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "lorenza.kunze@yahoo.com",
"actual": "lorenza.kunze@yahoo.com",
"isCorrect": true,
"inputTokens": 7874,
"outputTokens": 12,
"latencyMs": 1190.655541
},
{
"questionId": "q3",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "lorenza.kunze@yahoo.com",
"actual": "lorenza.kunze@yahoo.com",
"isCorrect": true,
"inputTokens": 7911,
"outputTokens": 10,
"latencyMs": 1595.469916
},
{
"questionId": "q3",
"format": "toon",
"model": "gpt-5-nano",
"expected": "lorenza.kunze@yahoo.com",
"actual": "lorenza.kunze@yahoo.com",
"isCorrect": true,
"inputTokens": 2529,
"outputTokens": 76,
"latencyMs": 4163.945208000001
},
{
"questionId": "q3",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "lorenza.kunze@yahoo.com",
"actual": "lorenza.kunze@yahoo.com",
"isCorrect": true,
"inputTokens": 2986,
"outputTokens": 12,
"latencyMs": 892.92875
},
{
"questionId": "q3",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "lorenza.kunze@yahoo.com",
"actual": "lorenza.kunze@yahoo.com",
"isCorrect": true,
"inputTokens": 3320,
"outputTokens": 10,
"latencyMs": 1780.4322919999995
},
{
"questionId": "q3",
"format": "csv",
"model": "gpt-5-nano",
"expected": "lorenza.kunze@yahoo.com",
"actual": "lorenza.kunze@yahoo.com",
"isCorrect": true,
"inputTokens": 2383,
"outputTokens": 76,
"latencyMs": 3440.4715000000006
},
{
"questionId": "q3",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "lorenza.kunze@yahoo.com",
"actual": "lorenza.kunze@yahoo.com",
"isCorrect": true,
"inputTokens": 2860,
"outputTokens": 12,
"latencyMs": 1312.3002079999997
},
{
"questionId": "q3",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "lorenza.kunze@yahoo.com",
"actual": "lorenza.kunze@yahoo.com",
"isCorrect": true,
"inputTokens": 3194,
"outputTokens": 10,
"latencyMs": 1560.3538330000001
},
{
"questionId": "q3",
"format": "xml",
"model": "gpt-5-nano",
"expected": "lorenza.kunze@yahoo.com",
"actual": "lorenza.kunze@yahoo.com",
"isCorrect": true,
"inputTokens": 7359,
"outputTokens": 76,
"latencyMs": 3440.5599999999995
},
{
"questionId": "q3",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "lorenza.kunze@yahoo.com",
"actual": "lorenza.kunze@yahoo.com",
"isCorrect": true,
"inputTokens": 9364,
"outputTokens": 12,
"latencyMs": 1354.2122089999993
},
{
"questionId": "q3",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "lorenza.kunze@yahoo.com",
"actual": "lorenza.kunze@yahoo.com",
"isCorrect": true,
"inputTokens": 9100,
"outputTokens": 10,
"latencyMs": 1389.2405829999998
},
{
"questionId": "q3",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "lorenza.kunze@yahoo.com",
"actual": "lorenza.kunze@yahoo.com",
"isCorrect": true,
"inputTokens": 5014,
"outputTokens": 76,
"latencyMs": 2048.7699159999993
},
{
"questionId": "q3",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "lorenza.kunze@yahoo.com",
"actual": "lorenza.kunze@yahoo.com",
"isCorrect": true,
"inputTokens": 5764,
"outputTokens": 12,
"latencyMs": 1123.4172500000004
},
{
"questionId": "q3",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "lorenza.kunze@yahoo.com",
"actual": "lorenza.kunze@yahoo.com",
"isCorrect": true,
"inputTokens": 5746,
"outputTokens": 10,
"latencyMs": 1638.1436670000003
},
{
"questionId": "q4",
"format": "json",
"model": "gpt-5-nano",
"expected": "117381",
"actual": "117381",
"isCorrect": true,
"inputTokens": 6390,
"outputTokens": 72,
"latencyMs": 2966.8363329999993
},
{
"questionId": "q4",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "117381",
"actual": "117381",
"isCorrect": true,
"inputTokens": 7870,
"outputTokens": 6,
"latencyMs": 1323.5372910000006
},
{
"questionId": "q4",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "117381",
"actual": "117381",
"isCorrect": true,
"inputTokens": 7909,
"outputTokens": 6,
"latencyMs": 1860.8958750000002
},
{
"questionId": "q4",
"format": "toon",
"model": "gpt-5-nano",
"expected": "117381",
"actual": "117381",
"isCorrect": true,
"inputTokens": 2527,
"outputTokens": 136,
"latencyMs": 6895.250208000001
},
{
"questionId": "q4",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "117381",
"actual": "117381",
"isCorrect": true,
"inputTokens": 2982,
"outputTokens": 6,
"latencyMs": 1020.296542
},
{
"questionId": "q4",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "117381",
"actual": "117381",
"isCorrect": true,
"inputTokens": 3318,
"outputTokens": 6,
"latencyMs": 2481.260875
},
{
"questionId": "q4",
"format": "csv",
"model": "gpt-5-nano",
"expected": "117381",
"actual": "117381",
"isCorrect": true,
"inputTokens": 2381,
"outputTokens": 200,
"latencyMs": 2689.2119999999995
},
{
"questionId": "q4",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "117381",
"actual": "117381",
"isCorrect": true,
"inputTokens": 2856,
"outputTokens": 6,
"latencyMs": 1194.3670409999995
},
{
"questionId": "q4",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "117381",
"actual": "117381",
"isCorrect": true,
"inputTokens": 3192,
"outputTokens": 6,
"latencyMs": 1743.3429579999993
},
{
"questionId": "q4",
"format": "xml",
"model": "gpt-5-nano",
"expected": "117381",
"actual": "117381",
"isCorrect": true,
"inputTokens": 7357,
"outputTokens": 72,
"latencyMs": 5788.955082999999
},
{
"questionId": "q4",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "117381",
"actual": "117381",
"isCorrect": true,
"inputTokens": 9360,
"outputTokens": 6,
"latencyMs": 1222.5617920000004
},
{
"questionId": "q4",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "117381",
"actual": "117381",
"isCorrect": true,
"inputTokens": 9098,
"outputTokens": 6,
"latencyMs": 1692.9171670000014
},
{
"questionId": "q4",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "117381",
"actual": "117381",
"isCorrect": true,
"inputTokens": 5012,
"outputTokens": 72,
"latencyMs": 6426.231709
},
{
"questionId": "q4",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "117381",
"actual": "117381",
"isCorrect": true,
"inputTokens": 5760,
"outputTokens": 6,
"latencyMs": 1159.4893339999999
},
{
"questionId": "q4",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "117381",
"actual": "117381",
"isCorrect": true,
"inputTokens": 5744,
"outputTokens": 6,
"latencyMs": 2415.9878329999992
},
{
"questionId": "q5",
"format": "json",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 6389,
"outputTokens": 71,
"latencyMs": 2950.774625
},
{
"questionId": "q5",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 7868,
"outputTokens": 4,
"latencyMs": 1003.6548750000002
},
{
"questionId": "q5",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 7907,
"outputTokens": 1,
"latencyMs": 1209.7468329999992
},
{
"questionId": "q5",
"format": "toon",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 2526,
"outputTokens": 71,
"latencyMs": 3026.993291999999
},
{
"questionId": "q5",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 2980,
"outputTokens": 4,
"latencyMs": 981.8320000000003
},
{
"questionId": "q5",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 3316,
"outputTokens": 1,
"latencyMs": 2011.3852089999982
},
{
"questionId": "q5",
"format": "csv",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 2380,
"outputTokens": 135,
"latencyMs": 4215.294709
},
{
"questionId": "q5",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 2854,
"outputTokens": 4,
"latencyMs": 906.2993340000012
},
{
"questionId": "q5",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 3190,
"outputTokens": 1,
"latencyMs": 1666.1483749999989
},
{
"questionId": "q5",
"format": "xml",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 7356,
"outputTokens": 135,
"latencyMs": 4311.166333000001
},
{
"questionId": "q5",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 9358,
"outputTokens": 4,
"latencyMs": 1072.923917
},
{
"questionId": "q5",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 9096,
"outputTokens": 1,
"latencyMs": 2526.938041999998
},
{
"questionId": "q5",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 5011,
"outputTokens": 135,
"latencyMs": 3970.2666659999995
},
{
"questionId": "q5",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 5758,
"outputTokens": 4,
"latencyMs": 1364.8737079999992
},
{
"questionId": "q5",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 5742,
"outputTokens": 1,
"latencyMs": 3125.6591660000013
},
{
"questionId": "q6",
"format": "json",
"model": "gpt-5-nano",
"expected": "jayda60@hotmail.com",
"actual": "jayda60@hotmail.com",
"isCorrect": true,
"inputTokens": 6390,
"outputTokens": 139,
"latencyMs": 3116.8453340000015
},
{
"questionId": "q6",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "jayda60@hotmail.com",
"actual": "jayda60@hotmail.com",
"isCorrect": true,
"inputTokens": 7871,
"outputTokens": 11,
"latencyMs": 1065.8984999999993
},
{
"questionId": "q6",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "jayda60@hotmail.com",
"actual": "jayda60@hotmail.com",
"isCorrect": true,
"inputTokens": 7908,
"outputTokens": 8,
"latencyMs": 2190.0096250000024
},
{
"questionId": "q6",
"format": "toon",
"model": "gpt-5-nano",
"expected": "jayda60@hotmail.com",
"actual": "jayda60@hotmail.com",
"isCorrect": true,
"inputTokens": 2527,
"outputTokens": 75,
"latencyMs": 2661.1630829999995
},
{
"questionId": "q6",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "jayda60@hotmail.com",
"actual": "jayda60@hotmail.com",
"isCorrect": true,
"inputTokens": 2983,
"outputTokens": 11,
"latencyMs": 990.5193749999999
},
{
"questionId": "q6",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "jayda60@hotmail.com",
"actual": "jayda60@hotmail.com",
"isCorrect": true,
"inputTokens": 3317,
"outputTokens": 8,
"latencyMs": 1937.4020420000015
},
{
"questionId": "q6",
"format": "csv",
"model": "gpt-5-nano",
"expected": "jayda60@hotmail.com",
"actual": "jayda60@hotmail.com",
"isCorrect": true,
"inputTokens": 2381,
"outputTokens": 139,
"latencyMs": 3740.6538750000036
},
{
"questionId": "q6",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "jayda60@hotmail.com",
"actual": "jayda60@hotmail.com",
"isCorrect": true,
"inputTokens": 2857,
"outputTokens": 11,
"latencyMs": 1033.1626250000008
},
{
"questionId": "q6",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "jayda60@hotmail.com",
"actual": "jayda60@hotmail.com",
"isCorrect": true,
"inputTokens": 3191,
"outputTokens": 8,
"latencyMs": 1733.0828340000007
},
{
"questionId": "q6",
"format": "xml",
"model": "gpt-5-nano",
"expected": "jayda60@hotmail.com",
"actual": "jayda60@hotmail.com",
"isCorrect": true,
"inputTokens": 7357,
"outputTokens": 139,
"latencyMs": 3042.367707999998
},
{
"questionId": "q6",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "jayda60@hotmail.com",
"actual": "jayda60@hotmail.com",
"isCorrect": true,
"inputTokens": 9361,
"outputTokens": 11,
"latencyMs": 1472.3534580000014
},
{
"questionId": "q6",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "jayda60@hotmail.com",
"actual": "jayda60@hotmail.com",
"isCorrect": true,
"inputTokens": 9097,
"outputTokens": 8,
"latencyMs": 1953.7035419999993
},
{
"questionId": "q6",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "jayda60@hotmail.com",
"actual": "jayda60@hotmail.com",
"isCorrect": true,
"inputTokens": 5012,
"outputTokens": 75,
"latencyMs": 2179.8505829999995
},
{
"questionId": "q6",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "jayda60@hotmail.com",
"actual": "jayda60@hotmail.com",
"isCorrect": true,
"inputTokens": 5761,
"outputTokens": 11,
"latencyMs": 1714.971625000002
},
{
"questionId": "q6",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "jayda60@hotmail.com",
"actual": "jayda60@hotmail.com",
"isCorrect": true,
"inputTokens": 5743,
"outputTokens": 8,
"latencyMs": 2170.373334
},
{
"questionId": "q7",
"format": "json",
"model": "gpt-5-nano",
"expected": "92971",
"actual": "92971",
"isCorrect": true,
"inputTokens": 6390,
"outputTokens": 72,
"latencyMs": 3005.6769590000004
},
{
"questionId": "q7",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "92971",
"actual": "92971",
"isCorrect": true,
"inputTokens": 7870,
"outputTokens": 6,
"latencyMs": 2070.191666999999
},
{
"questionId": "q7",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "92971",
"actual": "92971",
"isCorrect": true,
"inputTokens": 7907,
"outputTokens": 5,
"latencyMs": 1338.8482500000027
},
{
"questionId": "q7",
"format": "toon",
"model": "gpt-5-nano",
"expected": "92971",
"actual": "92971",
"isCorrect": true,
"inputTokens": 2527,
"outputTokens": 136,
"latencyMs": 2615.7999579999996
},
{
"questionId": "q7",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "92971",
"actual": "92971",
"isCorrect": true,
"inputTokens": 2982,
"outputTokens": 6,
"latencyMs": 1124.058917000002
},
{
"questionId": "q7",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "92971",
"actual": "92971",
"isCorrect": true,
"inputTokens": 3316,
"outputTokens": 5,
"latencyMs": 2317.5837079999983
},
{
"questionId": "q7",
"format": "csv",
"model": "gpt-5-nano",
"expected": "92971",
"actual": "92971",
"isCorrect": true,
"inputTokens": 2381,
"outputTokens": 72,
"latencyMs": 9505.310291999998
},
{
"questionId": "q7",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "92971",
"actual": "92971",
"isCorrect": true,
"inputTokens": 2856,
"outputTokens": 6,
"latencyMs": 895.9319159999977
},
{
"questionId": "q7",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "92971",
"actual": "92971",
"isCorrect": true,
"inputTokens": 3190,
"outputTokens": 5,
"latencyMs": 1462.6939160000002
},
{
"questionId": "q7",
"format": "xml",
"model": "gpt-5-nano",
"expected": "92971",
"actual": "92971",
"isCorrect": true,
"inputTokens": 7357,
"outputTokens": 136,
"latencyMs": 2529.6767499999987
},
{
"questionId": "q7",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "92971",
"actual": "92971",
"isCorrect": true,
"inputTokens": 9360,
"outputTokens": 6,
"latencyMs": 1144.4980419999993
},
{
"questionId": "q7",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "92971",
"actual": "92971",
"isCorrect": true,
"inputTokens": 9096,
"outputTokens": 5,
"latencyMs": 3182.1694160000006
},
{
"questionId": "q7",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "92971",
"actual": "92971",
"isCorrect": true,
"inputTokens": 5012,
"outputTokens": 72,
"latencyMs": 2789.477584
},
{
"questionId": "q7",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "92971",
"actual": "92971",
"isCorrect": true,
"inputTokens": 5760,
"outputTokens": 6,
"latencyMs": 1023.4829170000012
},
{
"questionId": "q7",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "92971",
"actual": "92971",
"isCorrect": true,
"inputTokens": 5742,
"outputTokens": 5,
"latencyMs": 3741.309666000001
},
{
"questionId": "q8",
"format": "json",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 6390,
"outputTokens": 199,
"latencyMs": 2646.0443330000016
},
{
"questionId": "q8",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 7871,
"outputTokens": 4,
"latencyMs": 1147.7947499999973
},
{
"questionId": "q8",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 7909,
"outputTokens": 1,
"latencyMs": 2658.0985
},
{
"questionId": "q8",
"format": "toon",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 2527,
"outputTokens": 71,
"latencyMs": 3748.428749999999
},
{
"questionId": "q8",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 2983,
"outputTokens": 4,
"latencyMs": 876.6897919999974
},
{
"questionId": "q8",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 3318,
"outputTokens": 1,
"latencyMs": 3812.920249999999
},
{
"questionId": "q8",
"format": "csv",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 2381,
"outputTokens": 71,
"latencyMs": 6820.9698750000025
},
{
"questionId": "q8",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 2857,
"outputTokens": 4,
"latencyMs": 997.5997500000012
},
{
"questionId": "q8",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 3192,
"outputTokens": 1,
"latencyMs": 1829.7533750000002
},
{
"questionId": "q8",
"format": "xml",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 7357,
"outputTokens": 135,
"latencyMs": 6256.235125000003
},
{
"questionId": "q8",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 9361,
"outputTokens": 4,
"latencyMs": 1280.0348330000015
},
{
"questionId": "q8",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 9098,
"outputTokens": 1,
"latencyMs": 3024.0259170000027
},
{
"questionId": "q8",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 5012,
"outputTokens": 71,
"latencyMs": 3522.8339579999993
},
{
"questionId": "q8",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 5761,
"outputTokens": 4,
"latencyMs": 1134.9532080000026
},
{
"questionId": "q8",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 5744,
"outputTokens": 1,
"latencyMs": 3095.1540000000023
},
{
"questionId": "q9",
"format": "json",
"model": "gpt-5-nano",
"expected": "terrance.hansen@yahoo.com",
"actual": "terrance.hansen@yahoo.com",
"isCorrect": true,
"inputTokens": 6392,
"outputTokens": 140,
"latencyMs": 2087.950582999998
},
{
"questionId": "q9",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "terrance.hansen@yahoo.com",
"actual": "terrance.hansen@yahoo.com",
"isCorrect": true,
"inputTokens": 7871,
"outputTokens": 11,
"latencyMs": 1115.425166000001
},
{
"questionId": "q9",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "terrance.hansen@yahoo.com",
"actual": "terrance.hansen@yahoo.com",
"isCorrect": true,
"inputTokens": 7910,
"outputTokens": 9,
"latencyMs": 1841.3965420000022
},
{
"questionId": "q9",
"format": "toon",
"model": "gpt-5-nano",
"expected": "terrance.hansen@yahoo.com",
"actual": "terrance.hansen@yahoo.com",
"isCorrect": true,
"inputTokens": 2529,
"outputTokens": 204,
"latencyMs": 4039.2035830000023
},
{
"questionId": "q9",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "terrance.hansen@yahoo.com",
"actual": "terrance.hansen@yahoo.com",
"isCorrect": true,
"inputTokens": 2983,
"outputTokens": 11,
"latencyMs": 1254.9832079999978
},
{
"questionId": "q9",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "terrance.hansen@yahoo.com",
"actual": "terrance.hansen@yahoo.com",
"isCorrect": true,
"inputTokens": 3319,
"outputTokens": 9,
"latencyMs": 2190.8811249999962
},
{
"questionId": "q9",
"format": "csv",
"model": "gpt-5-nano",
"expected": "terrance.hansen@yahoo.com",
"actual": "terrance.hansen@yahoo.com",
"isCorrect": true,
"inputTokens": 2383,
"outputTokens": 140,
"latencyMs": 3403.9012079999957
},
{
"questionId": "q9",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "terrance.hansen@yahoo.com",
"actual": "terrance.hansen@yahoo.com",
"isCorrect": true,
"inputTokens": 2857,
"outputTokens": 11,
"latencyMs": 1323.0636660000018
},
{
"questionId": "q9",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "terrance.hansen@yahoo.com",
"actual": "terrance.hansen@yahoo.com",
"isCorrect": true,
"inputTokens": 3193,
"outputTokens": 9,
"latencyMs": 1047.0718749999942
},
{
"questionId": "q9",
"format": "xml",
"model": "gpt-5-nano",
"expected": "terrance.hansen@yahoo.com",
"actual": "terrance.hansen@yahoo.com",
"isCorrect": true,
"inputTokens": 7359,
"outputTokens": 140,
"latencyMs": 3498.7119999999995
},
{
"questionId": "q9",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "terrance.hansen@yahoo.com",
"actual": "terrance.hansen@yahoo.com",
"isCorrect": true,
"inputTokens": 9361,
"outputTokens": 11,
"latencyMs": 1830.5542919999934
},
{
"questionId": "q9",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "terrance.hansen@yahoo.com",
"actual": "terrance.hansen@yahoo.com",
"isCorrect": true,
"inputTokens": 9099,
"outputTokens": 9,
"latencyMs": 2052.039208999995
},
{
"questionId": "q9",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "terrance.hansen@yahoo.com",
"actual": "terrance.hansen@yahoo.com",
"isCorrect": true,
"inputTokens": 5014,
"outputTokens": 140,
"latencyMs": 2254.0641659999965
},
{
"questionId": "q9",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "terrance.hansen@yahoo.com",
"actual": "terrance.hansen@yahoo.com",
"isCorrect": true,
"inputTokens": 5761,
"outputTokens": 11,
"latencyMs": 1279.8175830000037
},
{
"questionId": "q9",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "terrance.hansen@yahoo.com",
"actual": "terrance.hansen@yahoo.com",
"isCorrect": true,
"inputTokens": 5745,
"outputTokens": 9,
"latencyMs": 2624.0571249999994
},
{
"questionId": "q10",
"format": "json",
"model": "gpt-5-nano",
"expected": "107744",
"actual": "107744",
"isCorrect": true,
"inputTokens": 6391,
"outputTokens": 72,
"latencyMs": 3316.716124999999
},
{
"questionId": "q10",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "107744",
"actual": "107744",
"isCorrect": true,
"inputTokens": 7870,
"outputTokens": 6,
"latencyMs": 1078.8857919999937
},
{
"questionId": "q10",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "107744",
"actual": "107744",
"isCorrect": true,
"inputTokens": 7909,
"outputTokens": 6,
"latencyMs": 1426.163416000003
},
{
"questionId": "q10",
"format": "toon",
"model": "gpt-5-nano",
"expected": "107744",
"actual": "107744",
"isCorrect": true,
"inputTokens": 2528,
"outputTokens": 136,
"latencyMs": 3091.0714579999985
},
{
"questionId": "q10",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "107744",
"actual": "107744",
"isCorrect": true,
"inputTokens": 2982,
"outputTokens": 6,
"latencyMs": 1171.1557079999984
},
{
"questionId": "q10",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "107744",
"actual": "107744",
"isCorrect": true,
"inputTokens": 3318,
"outputTokens": 6,
"latencyMs": 2722.0316250000033
},
{
"questionId": "q10",
"format": "csv",
"model": "gpt-5-nano",
"expected": "107744",
"actual": "107744",
"isCorrect": true,
"inputTokens": 2382,
"outputTokens": 72,
"latencyMs": 3280.0853329999954
},
{
"questionId": "q10",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "107744",
"actual": "107744",
"isCorrect": true,
"inputTokens": 2856,
"outputTokens": 6,
"latencyMs": 937.3515409999964
},
{
"questionId": "q10",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "107744",
"actual": "107744",
"isCorrect": true,
"inputTokens": 3192,
"outputTokens": 6,
"latencyMs": 1638.423999999999
},
{
"questionId": "q10",
"format": "xml",
"model": "gpt-5-nano",
"expected": "107744",
"actual": "107744",
"isCorrect": true,
"inputTokens": 7358,
"outputTokens": 136,
"latencyMs": 15425.220833
},
{
"questionId": "q10",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "107744",
"actual": "107744",
"isCorrect": true,
"inputTokens": 9360,
"outputTokens": 6,
"latencyMs": 1195.8543749999953
},
{
"questionId": "q10",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "107744",
"actual": "107744",
"isCorrect": true,
"inputTokens": 9098,
"outputTokens": 6,
"latencyMs": 2432.2206250000017
},
{
"questionId": "q10",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "107744",
"actual": "107744",
"isCorrect": true,
"inputTokens": 5013,
"outputTokens": 72,
"latencyMs": 2047.1201250000013
},
{
"questionId": "q10",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "107744",
"actual": "107744",
"isCorrect": true,
"inputTokens": 5760,
"outputTokens": 6,
"latencyMs": 1617.048625000003
},
{
"questionId": "q10",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "107744",
"actual": "107744",
"isCorrect": true,
"inputTokens": 5744,
"outputTokens": 6,
"latencyMs": 1548.9360000000015
},
{
"questionId": "q11",
"format": "json",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 6390,
"outputTokens": 71,
"latencyMs": 3741.5673339999994
},
{
"questionId": "q11",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 7869,
"outputTokens": 4,
"latencyMs": 1189.5477079999982
},
{
"questionId": "q11",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 7909,
"outputTokens": 1,
"latencyMs": 1194.6662920000017
},
{
"questionId": "q11",
"format": "toon",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 2527,
"outputTokens": 135,
"latencyMs": 2947.4346250000017
},
{
"questionId": "q11",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 2981,
"outputTokens": 4,
"latencyMs": 944.1087090000001
},
{
"questionId": "q11",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 3318,
"outputTokens": 1,
"latencyMs": 2017.044041999994
},
{
"questionId": "q11",
"format": "csv",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 2381,
"outputTokens": 71,
"latencyMs": 4068.897624999998
},
{
"questionId": "q11",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 2855,
"outputTokens": 4,
"latencyMs": 1092.8982499999984
},
{
"questionId": "q11",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 3192,
"outputTokens": 1,
"latencyMs": 2148.519874999998
},
{
"questionId": "q11",
"format": "xml",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 7357,
"outputTokens": 135,
"latencyMs": 3025.696167000002
},
{
"questionId": "q11",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 9359,
"outputTokens": 4,
"latencyMs": 1069.479542000001
},
{
"questionId": "q11",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 9098,
"outputTokens": 1,
"latencyMs": 2595.035582999997
},
{
"questionId": "q11",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 5012,
"outputTokens": 71,
"latencyMs": 2200.230208000001
},
{
"questionId": "q11",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 5759,
"outputTokens": 4,
"latencyMs": 1226.070749999999
},
{
"questionId": "q11",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 5744,
"outputTokens": 1,
"latencyMs": 2045.9056249999994
},
{
"questionId": "q12",
"format": "json",
"model": "gpt-5-nano",
"expected": "allan21@gmail.com",
"actual": "allan21@gmail.com",
"isCorrect": true,
"inputTokens": 6389,
"outputTokens": 266,
"latencyMs": 5672.897708000004
},
{
"questionId": "q12",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "allan21@gmail.com",
"actual": "allan21@gmail.com",
"isCorrect": true,
"inputTokens": 7867,
"outputTokens": 9,
"latencyMs": 1745.323000000004
},
{
"questionId": "q12",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "allan21@gmail.com",
"actual": "allan21@gmail.com",
"isCorrect": true,
"inputTokens": 7908,
"outputTokens": 8,
"latencyMs": 1877.5404999999955
},
{
"questionId": "q12",
"format": "toon",
"model": "gpt-5-nano",
"expected": "allan21@gmail.com",
"actual": "allan21@gmail.com",
"isCorrect": true,
"inputTokens": 2526,
"outputTokens": 74,
"latencyMs": 5317.909041999999
},
{
"questionId": "q12",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "allan21@gmail.com",
"actual": "allan21@gmail.com",
"isCorrect": true,
"inputTokens": 2979,
"outputTokens": 9,
"latencyMs": 916.7109169999967
},
{
"questionId": "q12",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "allan21@gmail.com",
"actual": "allan21@gmail.com",
"isCorrect": true,
"inputTokens": 3317,
"outputTokens": 8,
"latencyMs": 2401.305290999997
},
{
"questionId": "q12",
"format": "csv",
"model": "gpt-5-nano",
"expected": "allan21@gmail.com",
"actual": "allan21@gmail.com",
"isCorrect": true,
"inputTokens": 2380,
"outputTokens": 74,
"latencyMs": 3016.4596669999955
},
{
"questionId": "q12",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "allan21@gmail.com",
"actual": "allan21@gmail.com",
"isCorrect": true,
"inputTokens": 2853,
"outputTokens": 9,
"latencyMs": 1233.9625830000004
},
{
"questionId": "q12",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "allan21@gmail.com",
"actual": "allan21@gmail.com",
"isCorrect": true,
"inputTokens": 3191,
"outputTokens": 8,
"latencyMs": 2000.6465000000026
},
{
"questionId": "q12",
"format": "xml",
"model": "gpt-5-nano",
"expected": "allan21@gmail.com",
"actual": "allan21@gmail.com",
"isCorrect": true,
"inputTokens": 7356,
"outputTokens": 138,
"latencyMs": 6270.167416999997
},
{
"questionId": "q12",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "allan21@gmail.com",
"actual": "allan21@gmail.com",
"isCorrect": true,
"inputTokens": 9357,
"outputTokens": 9,
"latencyMs": 2332.7022089999955
},
{
"questionId": "q12",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "allan21@gmail.com",
"actual": "allan21@gmail.com",
"isCorrect": true,
"inputTokens": 9097,
"outputTokens": 8,
"latencyMs": 1986.9040000000023
},
{
"questionId": "q12",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "allan21@gmail.com",
"actual": "allan21@gmail.com",
"isCorrect": true,
"inputTokens": 5011,
"outputTokens": 74,
"latencyMs": 3294.769625000001
},
{
"questionId": "q12",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "allan21@gmail.com",
"actual": "allan21@gmail.com",
"isCorrect": true,
"inputTokens": 5757,
"outputTokens": 9,
"latencyMs": 1028.5119580000028
},
{
"questionId": "q12",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "allan21@gmail.com",
"actual": "allan21@gmail.com",
"isCorrect": true,
"inputTokens": 5743,
"outputTokens": 8,
"latencyMs": 1788.622083000002
},
{
"questionId": "q13",
"format": "json",
"model": "gpt-5-nano",
"expected": "145843",
"actual": "145843",
"isCorrect": true,
"inputTokens": 6388,
"outputTokens": 72,
"latencyMs": 2426.662333
},
{
"questionId": "q13",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "145843",
"actual": "145843",
"isCorrect": true,
"inputTokens": 7868,
"outputTokens": 6,
"latencyMs": 1199.7499580000003
},
{
"questionId": "q13",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "145843",
"actual": "145843",
"isCorrect": true,
"inputTokens": 7907,
"outputTokens": 6,
"latencyMs": 2230.200499999999
},
{
"questionId": "q13",
"format": "toon",
"model": "gpt-5-nano",
"expected": "145843",
"actual": "145843",
"isCorrect": true,
"inputTokens": 2525,
"outputTokens": 72,
"latencyMs": 2973.9408330000006
},
{
"questionId": "q13",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "145843",
"actual": "145843",
"isCorrect": true,
"inputTokens": 2980,
"outputTokens": 6,
"latencyMs": 1759.8231249999953
},
{
"questionId": "q13",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "145843",
"actual": "145843",
"isCorrect": true,
"inputTokens": 3316,
"outputTokens": 6,
"latencyMs": 3236.040165999999
},
{
"questionId": "q13",
"format": "csv",
"model": "gpt-5-nano",
"expected": "145843",
"actual": "145843",
"isCorrect": true,
"inputTokens": 2379,
"outputTokens": 72,
"latencyMs": 2829.9307920000065
},
{
"questionId": "q13",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "145843",
"actual": "145843",
"isCorrect": true,
"inputTokens": 2854,
"outputTokens": 6,
"latencyMs": 905.942667000003
},
{
"questionId": "q13",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "145843",
"actual": "145843",
"isCorrect": true,
"inputTokens": 3190,
"outputTokens": 6,
"latencyMs": 1492.0838749999966
},
{
"questionId": "q13",
"format": "xml",
"model": "gpt-5-nano",
"expected": "145843",
"actual": "145843",
"isCorrect": true,
"inputTokens": 7355,
"outputTokens": 136,
"latencyMs": 3018.9516250000015
},
{
"questionId": "q13",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "145843",
"actual": "145843",
"isCorrect": true,
"inputTokens": 9358,
"outputTokens": 6,
"latencyMs": 1010.1432910000003
},
{
"questionId": "q13",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "145843",
"actual": "145843",
"isCorrect": true,
"inputTokens": 9096,
"outputTokens": 6,
"latencyMs": 2475.971083000004
},
{
"questionId": "q13",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "145843",
"actual": "145843",
"isCorrect": true,
"inputTokens": 5010,
"outputTokens": 72,
"latencyMs": 2322.1169999999984
},
{
"questionId": "q13",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "145843",
"actual": "145843",
"isCorrect": true,
"inputTokens": 5758,
"outputTokens": 6,
"latencyMs": 993.6942500000005
},
{
"questionId": "q13",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "145843",
"actual": "145843",
"isCorrect": true,
"inputTokens": 5742,
"outputTokens": 6,
"latencyMs": 2137.871124999998
},
{
"questionId": "q14",
"format": "json",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 6389,
"outputTokens": 71,
"latencyMs": 2223.1494999999995
},
{
"questionId": "q14",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 7868,
"outputTokens": 4,
"latencyMs": 1101.960708999999
},
{
"questionId": "q14",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 7908,
"outputTokens": 1,
"latencyMs": 1264.4358330000032
},
{
"questionId": "q14",
"format": "toon",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 2526,
"outputTokens": 71,
"latencyMs": 3117.289082999996
},
{
"questionId": "q14",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 2980,
"outputTokens": 4,
"latencyMs": 975.8156250000029
},
{
"questionId": "q14",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 3317,
"outputTokens": 1,
"latencyMs": 2076.140041999999
},
{
"questionId": "q14",
"format": "csv",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 2380,
"outputTokens": 71,
"latencyMs": 3522.6094999999987
},
{
"questionId": "q14",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 2854,
"outputTokens": 4,
"latencyMs": 749.1067079999993
},
{
"questionId": "q14",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 3191,
"outputTokens": 1,
"latencyMs": 2162.154208
},
{
"questionId": "q14",
"format": "xml",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 7356,
"outputTokens": 135,
"latencyMs": 15105.717249999994
},
{
"questionId": "q14",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 9358,
"outputTokens": 4,
"latencyMs": 1518.0794160000005
},
{
"questionId": "q14",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 9097,
"outputTokens": 1,
"latencyMs": 2634.745458999998
},
{
"questionId": "q14",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 5011,
"outputTokens": 71,
"latencyMs": 2809.990375000001
},
{
"questionId": "q14",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 5758,
"outputTokens": 4,
"latencyMs": 2328.9382079999996
},
{
"questionId": "q14",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 5743,
"outputTokens": 1,
"latencyMs": 2122.7864169999957
},
{
"questionId": "q15",
"format": "json",
"model": "gpt-5-nano",
"expected": "alexandria61@gmail.com",
"actual": "alexandria61@gmail.com",
"isCorrect": true,
"inputTokens": 6390,
"outputTokens": 140,
"latencyMs": 2744.6706660000054
},
{
"questionId": "q15",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "alexandria61@gmail.com",
"actual": "alexandria61@gmail.com",
"isCorrect": true,
"inputTokens": 7869,
"outputTokens": 9,
"latencyMs": 1389.9784999999974
},
{
"questionId": "q15",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "alexandria61@gmail.com",
"actual": "alexandria61@gmail.com",
"isCorrect": true,
"inputTokens": 7909,
"outputTokens": 8,
"latencyMs": 1310.762625000003
},
{
"questionId": "q15",
"format": "toon",
"model": "gpt-5-nano",
"expected": "alexandria61@gmail.com",
"actual": "alexandria61@gmail.com",
"isCorrect": true,
"inputTokens": 2527,
"outputTokens": 204,
"latencyMs": 5402.840416999999
},
{
"questionId": "q15",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "alexandria61@gmail.com",
"actual": "alexandria61@gmail.com",
"isCorrect": true,
"inputTokens": 2981,
"outputTokens": 9,
"latencyMs": 1480.7467909999978
},
{
"questionId": "q15",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "alexandria61@gmail.com",
"actual": "alexandria61@gmail.com",
"isCorrect": true,
"inputTokens": 3318,
"outputTokens": 8,
"latencyMs": 1741.1184169999979
},
{
"questionId": "q15",
"format": "csv",
"model": "gpt-5-nano",
"expected": "alexandria61@gmail.com",
"actual": "alexandria61@gmail.com",
"isCorrect": true,
"inputTokens": 2381,
"outputTokens": 140,
"latencyMs": 2192.0577909999993
},
{
"questionId": "q15",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "alexandria61@gmail.com",
"actual": "alexandria61@gmail.com",
"isCorrect": true,
"inputTokens": 2855,
"outputTokens": 9,
"latencyMs": 1052.5672919999997
},
{
"questionId": "q15",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "alexandria61@gmail.com",
"actual": "alexandria61@gmail.com",
"isCorrect": true,
"inputTokens": 3192,
"outputTokens": 8,
"latencyMs": 2969.6880840000013
},
{
"questionId": "q15",
"format": "xml",
"model": "gpt-5-nano",
"expected": "alexandria61@gmail.com",
"actual": "alexandria61@gmail.com",
"isCorrect": true,
"inputTokens": 7357,
"outputTokens": 140,
"latencyMs": 4902.5039590000015
},
{
"questionId": "q15",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "alexandria61@gmail.com",
"actual": "alexandria61@gmail.com",
"isCorrect": true,
"inputTokens": 9359,
"outputTokens": 9,
"latencyMs": 1337.9500409999964
},
{
"questionId": "q15",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "alexandria61@gmail.com",
"actual": "alexandria61@gmail.com",
"isCorrect": true,
"inputTokens": 9098,
"outputTokens": 8,
"latencyMs": 988.1449579999971
},
{
"questionId": "q15",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "alexandria61@gmail.com",
"actual": "alexandria61@gmail.com",
"isCorrect": true,
"inputTokens": 5012,
"outputTokens": 140,
"latencyMs": 5435.804457999999
},
{
"questionId": "q15",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "alexandria61@gmail.com",
"actual": "alexandria61@gmail.com",
"isCorrect": true,
"inputTokens": 5759,
"outputTokens": 9,
"latencyMs": 1164.0297080000018
},
{
"questionId": "q15",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "alexandria61@gmail.com",
"actual": "alexandria61@gmail.com",
"isCorrect": true,
"inputTokens": 5744,
"outputTokens": 8,
"latencyMs": 1684.5642079999961
},
{
"questionId": "q16",
"format": "json",
"model": "gpt-5-nano",
"expected": "89436",
"actual": "89436",
"isCorrect": true,
"inputTokens": 6389,
"outputTokens": 72,
"latencyMs": 2137.3070000000007
},
{
"questionId": "q16",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "89436",
"actual": "89436",
"isCorrect": true,
"inputTokens": 7870,
"outputTokens": 6,
"latencyMs": 1353.1784169999955
},
{
"questionId": "q16",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "89436",
"actual": "89436",
"isCorrect": true,
"inputTokens": 7909,
"outputTokens": 5,
"latencyMs": 2152.076667000001
},
{
"questionId": "q16",
"format": "toon",
"model": "gpt-5-nano",
"expected": "89436",
"actual": "89436",
"isCorrect": true,
"inputTokens": 2526,
"outputTokens": 72,
"latencyMs": 9838.444999999992
},
{
"questionId": "q16",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "89436",
"actual": "89436",
"isCorrect": true,
"inputTokens": 2982,
"outputTokens": 6,
"latencyMs": 1011.8612080000021
},
{
"questionId": "q16",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "89436",
"actual": "89436",
"isCorrect": true,
"inputTokens": 3318,
"outputTokens": 5,
"latencyMs": 2380.466207999998
},
{
"questionId": "q16",
"format": "csv",
"model": "gpt-5-nano",
"expected": "89436",
"actual": "89436",
"isCorrect": true,
"inputTokens": 2380,
"outputTokens": 72,
"latencyMs": 2358.7515829999975
},
{
"questionId": "q16",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "89436",
"actual": "89436",
"isCorrect": true,
"inputTokens": 2856,
"outputTokens": 6,
"latencyMs": 1073.5187089999963
},
{
"questionId": "q16",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "89436",
"actual": "89436",
"isCorrect": true,
"inputTokens": 3192,
"outputTokens": 5,
"latencyMs": 1808.9837499999994
},
{
"questionId": "q16",
"format": "xml",
"model": "gpt-5-nano",
"expected": "89436",
"actual": "89436",
"isCorrect": true,
"inputTokens": 7356,
"outputTokens": 200,
"latencyMs": 3657.137167000008
},
{
"questionId": "q16",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "89436",
"actual": "89436",
"isCorrect": true,
"inputTokens": 9360,
"outputTokens": 6,
"latencyMs": 1216.3329169999997
},
{
"questionId": "q16",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "89436",
"actual": "89436",
"isCorrect": true,
"inputTokens": 9098,
"outputTokens": 5,
"latencyMs": 2347.6749580000032
},
{
"questionId": "q16",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "89436",
"actual": "89436",
"isCorrect": true,
"inputTokens": 5011,
"outputTokens": 136,
"latencyMs": 2985.761999999995
},
{
"questionId": "q16",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "89436",
"actual": "89436",
"isCorrect": true,
"inputTokens": 5760,
"outputTokens": 6,
"latencyMs": 1062.5013749999998
},
{
"questionId": "q16",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "89436",
"actual": "89436",
"isCorrect": true,
"inputTokens": 5744,
"outputTokens": 5,
"latencyMs": 2942.199041999993
},
{
"questionId": "q17",
"format": "json",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 6392,
"outputTokens": 71,
"latencyMs": 2072.9703750000044
},
{
"questionId": "q17",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 7872,
"outputTokens": 4,
"latencyMs": 1143.0027499999997
},
{
"questionId": "q17",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 7911,
"outputTokens": 1,
"latencyMs": 2339.718792000007
},
{
"questionId": "q17",
"format": "toon",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 2529,
"outputTokens": 135,
"latencyMs": 2721.8648749999993
},
{
"questionId": "q17",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 2984,
"outputTokens": 4,
"latencyMs": 1106.3964160000032
},
{
"questionId": "q17",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 3320,
"outputTokens": 1,
"latencyMs": 2453.6342910000094
},
{
"questionId": "q17",
"format": "csv",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 2383,
"outputTokens": 135,
"latencyMs": 2526.1070829999953
},
{
"questionId": "q17",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 2858,
"outputTokens": 4,
"latencyMs": 963.8103339999943
},
{
"questionId": "q17",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 3194,
"outputTokens": 1,
"latencyMs": 1213.7454580000049
},
{
"questionId": "q17",
"format": "xml",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 7359,
"outputTokens": 199,
"latencyMs": 3451.3691249999974
},
{
"questionId": "q17",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 9362,
"outputTokens": 4,
"latencyMs": 1054.2650409999915
},
{
"questionId": "q17",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 9100,
"outputTokens": 1,
"latencyMs": 1712.7362089999951
},
{
"questionId": "q17",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 5014,
"outputTokens": 199,
"latencyMs": 4517.758332999991
},
{
"questionId": "q17",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 5762,
"outputTokens": 4,
"latencyMs": 1036.0673749999987
},
{
"questionId": "q17",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 5746,
"outputTokens": 1,
"latencyMs": 2099.134084000005
},
{
"questionId": "q18",
"format": "json",
"model": "gpt-5-nano",
"expected": "kelvin54@yahoo.com",
"actual": "kelvin54@yahoo.com",
"isCorrect": true,
"inputTokens": 6390,
"outputTokens": 139,
"latencyMs": 3450.1222080000007
},
{
"questionId": "q18",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "kelvin54@yahoo.com",
"actual": "kelvin54@yahoo.com",
"isCorrect": true,
"inputTokens": 7871,
"outputTokens": 10,
"latencyMs": 2320.022790999996
},
{
"questionId": "q18",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "kelvin54@yahoo.com",
"actual": "kelvin54@yahoo.com",
"isCorrect": true,
"inputTokens": 7909,
"outputTokens": 8,
"latencyMs": 1058.7114589999983
},
{
"questionId": "q18",
"format": "toon",
"model": "gpt-5-nano",
"expected": "kelvin54@yahoo.com",
"actual": "kelvin54@yahoo.com",
"isCorrect": true,
"inputTokens": 2527,
"outputTokens": 75,
"latencyMs": 3345.744040999998
},
{
"questionId": "q18",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "kelvin54@yahoo.com",
"actual": "kelvin54@yahoo.com",
"isCorrect": true,
"inputTokens": 2983,
"outputTokens": 10,
"latencyMs": 1209.7132500000007
},
{
"questionId": "q18",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "kelvin54@yahoo.com",
"actual": "kelvin54@yahoo.com",
"isCorrect": true,
"inputTokens": 3318,
"outputTokens": 8,
"latencyMs": 1716.227457999994
},
{
"questionId": "q18",
"format": "csv",
"model": "gpt-5-nano",
"expected": "kelvin54@yahoo.com",
"actual": "kelvin54@yahoo.com",
"isCorrect": true,
"inputTokens": 2381,
"outputTokens": 139,
"latencyMs": 3093.9495000000024
},
{
"questionId": "q18",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "kelvin54@yahoo.com",
"actual": "kelvin54@yahoo.com",
"isCorrect": true,
"inputTokens": 2857,
"outputTokens": 10,
"latencyMs": 1311.3692500000034
},
{
"questionId": "q18",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "kelvin54@yahoo.com",
"actual": "kelvin54@yahoo.com",
"isCorrect": true,
"inputTokens": 3192,
"outputTokens": 8,
"latencyMs": 794.0660829999979
},
{
"questionId": "q18",
"format": "xml",
"model": "gpt-5-nano",
"expected": "kelvin54@yahoo.com",
"actual": "kelvin54@yahoo.com",
"isCorrect": true,
"inputTokens": 7357,
"outputTokens": 459,
"latencyMs": 5397.067582999996
},
{
"questionId": "q18",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "kelvin54@yahoo.com",
"actual": "kelvin54@yahoo.com",
"isCorrect": true,
"inputTokens": 9361,
"outputTokens": 10,
"latencyMs": 1179.005124999996
},
{
"questionId": "q18",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "kelvin54@yahoo.com",
"actual": "kelvin54@yahoo.com",
"isCorrect": true,
"inputTokens": 9098,
"outputTokens": 8,
"latencyMs": 3390.3811669999996
},
{
"questionId": "q18",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "kelvin54@yahoo.com",
"actual": "kelvin54@yahoo.com",
"isCorrect": true,
"inputTokens": 5012,
"outputTokens": 75,
"latencyMs": 3942.734500000006
},
{
"questionId": "q18",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "kelvin54@yahoo.com",
"actual": "kelvin54@yahoo.com",
"isCorrect": true,
"inputTokens": 5761,
"outputTokens": 10,
"latencyMs": 1198.2199580000015
},
{
"questionId": "q18",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "kelvin54@yahoo.com",
"actual": "kelvin54@yahoo.com",
"isCorrect": true,
"inputTokens": 5744,
"outputTokens": 8,
"latencyMs": 1988.9680829999998
},
{
"questionId": "q19",
"format": "json",
"model": "gpt-5-nano",
"expected": "143365",
"actual": "143365",
"isCorrect": true,
"inputTokens": 6390,
"outputTokens": 200,
"latencyMs": 2964.017540999994
},
{
"questionId": "q19",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "143365",
"actual": "143365",
"isCorrect": true,
"inputTokens": 7872,
"outputTokens": 6,
"latencyMs": 1171.257249999995
},
{
"questionId": "q19",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "143365",
"actual": "143365",
"isCorrect": true,
"inputTokens": 7909,
"outputTokens": 6,
"latencyMs": 1304.4575840000034
},
{
"questionId": "q19",
"format": "toon",
"model": "gpt-5-nano",
"expected": "143365",
"actual": "143365",
"isCorrect": true,
"inputTokens": 2527,
"outputTokens": 72,
"latencyMs": 3056.008249999999
},
{
"questionId": "q19",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "143365",
"actual": "143365",
"isCorrect": true,
"inputTokens": 2984,
"outputTokens": 6,
"latencyMs": 873.7801659999968
},
{
"questionId": "q19",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "143365",
"actual": "143365",
"isCorrect": true,
"inputTokens": 3318,
"outputTokens": 6,
"latencyMs": 1536.4943750000093
},
{
"questionId": "q19",
"format": "csv",
"model": "gpt-5-nano",
"expected": "143365",
"actual": "143365",
"isCorrect": true,
"inputTokens": 2381,
"outputTokens": 328,
"latencyMs": 3966.832792000001
},
{
"questionId": "q19",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "143365",
"actual": "143365",
"isCorrect": true,
"inputTokens": 2858,
"outputTokens": 6,
"latencyMs": 1072.791458000007
},
{
"questionId": "q19",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "143365",
"actual": "143365",
"isCorrect": true,
"inputTokens": 3192,
"outputTokens": 6,
"latencyMs": 1334.2349169999943
},
{
"questionId": "q19",
"format": "xml",
"model": "gpt-5-nano",
"expected": "143365",
"actual": "143365",
"isCorrect": true,
"inputTokens": 7357,
"outputTokens": 136,
"latencyMs": 2824.245167000001
},
{
"questionId": "q19",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "143365",
"actual": "143365",
"isCorrect": true,
"inputTokens": 9362,
"outputTokens": 6,
"latencyMs": 1156.3476669999945
},
{
"questionId": "q19",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "143365",
"actual": "143365",
"isCorrect": true,
"inputTokens": 9098,
"outputTokens": 6,
"latencyMs": 2503.603999999992
},
{
"questionId": "q19",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "143365",
"actual": "143365",
"isCorrect": true,
"inputTokens": 5012,
"outputTokens": 72,
"latencyMs": 1988.6155419999996
},
{
"questionId": "q19",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "143365",
"actual": "143365",
"isCorrect": true,
"inputTokens": 5762,
"outputTokens": 6,
"latencyMs": 2019.264417000013
},
{
"questionId": "q19",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "143365",
"actual": "143365",
"isCorrect": true,
"inputTokens": 5744,
"outputTokens": 6,
"latencyMs": 2120.657042000006
},
{
"questionId": "q20",
"format": "json",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 6389,
"outputTokens": 71,
"latencyMs": 2674.240417000008
},
{
"questionId": "q20",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 7868,
"outputTokens": 4,
"latencyMs": 985.5821250000008
},
{
"questionId": "q20",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 7908,
"outputTokens": 1,
"latencyMs": 1005.9853749999893
},
{
"questionId": "q20",
"format": "toon",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 2526,
"outputTokens": 71,
"latencyMs": 2337.429165999987
},
{
"questionId": "q20",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 2980,
"outputTokens": 4,
"latencyMs": 1671.3083750000078
},
{
"questionId": "q20",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 3317,
"outputTokens": 1,
"latencyMs": 1858.936124999993
},
{
"questionId": "q20",
"format": "csv",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 2380,
"outputTokens": 71,
"latencyMs": 1797.8257500000036
},
{
"questionId": "q20",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 2854,
"outputTokens": 4,
"latencyMs": 1014.9593339999992
},
{
"questionId": "q20",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 3191,
"outputTokens": 1,
"latencyMs": 1534.200667000012
},
{
"questionId": "q20",
"format": "xml",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 7356,
"outputTokens": 135,
"latencyMs": 3340.923125000001
},
{
"questionId": "q20",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 9358,
"outputTokens": 4,
"latencyMs": 1555.2516250000044
},
{
"questionId": "q20",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 9097,
"outputTokens": 1,
"latencyMs": 2945.7507919999916
},
{
"questionId": "q20",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 5011,
"outputTokens": 71,
"latencyMs": 3605.196708999996
},
{
"questionId": "q20",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 5758,
"outputTokens": 4,
"latencyMs": 1068.8147920000047
},
{
"questionId": "q20",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 5743,
"outputTokens": 1,
"latencyMs": 2330.3333749999874
},
{
"questionId": "q21",
"format": "json",
"model": "gpt-5-nano",
"expected": "dean19@gmail.com",
"actual": "dean19@gmail.com",
"isCorrect": true,
"inputTokens": 6393,
"outputTokens": 75,
"latencyMs": 2723.754000000001
},
{
"questionId": "q21",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "dean19@gmail.com",
"actual": "dean19@gmail.com",
"isCorrect": true,
"inputTokens": 7876,
"outputTokens": 9,
"latencyMs": 1170.7758329999924
},
{
"questionId": "q21",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "dean19@gmail.com",
"actual": "dean19@gmail.com",
"isCorrect": true,
"inputTokens": 7912,
"outputTokens": 7,
"latencyMs": 2132.3265829999873
},
{
"questionId": "q21",
"format": "toon",
"model": "gpt-5-nano",
"expected": "dean19@gmail.com",
"actual": "dean19@gmail.com",
"isCorrect": true,
"inputTokens": 2530,
"outputTokens": 139,
"latencyMs": 3074.613540999999
},
{
"questionId": "q21",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "dean19@gmail.com",
"actual": "dean19@gmail.com",
"isCorrect": true,
"inputTokens": 2988,
"outputTokens": 9,
"latencyMs": 887.1294170000037
},
{
"questionId": "q21",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "dean19@gmail.com",
"actual": "dean19@gmail.com",
"isCorrect": true,
"inputTokens": 3321,
"outputTokens": 7,
"latencyMs": 1689.1039579999924
},
{
"questionId": "q21",
"format": "csv",
"model": "gpt-5-nano",
"expected": "dean19@gmail.com",
"actual": "dean19@gmail.com",
"isCorrect": true,
"inputTokens": 2384,
"outputTokens": 75,
"latencyMs": 2337.622915999993
},
{
"questionId": "q21",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "dean19@gmail.com",
"actual": "dean19@gmail.com",
"isCorrect": true,
"inputTokens": 2862,
"outputTokens": 9,
"latencyMs": 951.0157920000056
},
{
"questionId": "q21",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "dean19@gmail.com",
"actual": "dean19@gmail.com",
"isCorrect": true,
"inputTokens": 3195,
"outputTokens": 7,
"latencyMs": 2195.647125000003
},
{
"questionId": "q21",
"format": "xml",
"model": "gpt-5-nano",
"expected": "dean19@gmail.com",
"actual": "dean19@gmail.com",
"isCorrect": true,
"inputTokens": 7360,
"outputTokens": 75,
"latencyMs": 2328.1204169999983
},
{
"questionId": "q21",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "dean19@gmail.com",
"actual": "dean19@gmail.com",
"isCorrect": true,
"inputTokens": 9366,
"outputTokens": 9,
"latencyMs": 1225.2067499999976
},
{
"questionId": "q21",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "dean19@gmail.com",
"actual": "dean19@gmail.com",
"isCorrect": true,
"inputTokens": 9101,
"outputTokens": 7,
"latencyMs": 1613.4727500000008
},
{
"questionId": "q21",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "dean19@gmail.com",
"actual": "dean19@gmail.com",
"isCorrect": true,
"inputTokens": 5015,
"outputTokens": 75,
"latencyMs": 2482.4477909999987
},
{
"questionId": "q21",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "dean19@gmail.com",
"actual": "dean19@gmail.com",
"isCorrect": true,
"inputTokens": 5766,
"outputTokens": 9,
"latencyMs": 1235.0746250000084
},
{
"questionId": "q21",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "dean19@gmail.com",
"actual": "dean19@gmail.com",
"isCorrect": true,
"inputTokens": 5747,
"outputTokens": 7,
"latencyMs": 4278.624791999988
},
{
"questionId": "q22",
"format": "json",
"model": "gpt-5-nano",
"expected": "111314",
"actual": "111314",
"isCorrect": true,
"inputTokens": 6391,
"outputTokens": 136,
"latencyMs": 2741.065750000009
},
{
"questionId": "q22",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "111314",
"actual": "111314",
"isCorrect": true,
"inputTokens": 7871,
"outputTokens": 6,
"latencyMs": 1172.1854580000072
},
{
"questionId": "q22",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "111314",
"actual": "111314",
"isCorrect": true,
"inputTokens": 7909,
"outputTokens": 6,
"latencyMs": 1184.0355000000127
},
{
"questionId": "q22",
"format": "toon",
"model": "gpt-5-nano",
"expected": "111314",
"actual": "111314",
"isCorrect": true,
"inputTokens": 2528,
"outputTokens": 136,
"latencyMs": 6348.677542000005
},
{
"questionId": "q22",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "111314",
"actual": "111314",
"isCorrect": true,
"inputTokens": 2983,
"outputTokens": 6,
"latencyMs": 964.3882920000033
},
{
"questionId": "q22",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "111314",
"actual": "111314",
"isCorrect": true,
"inputTokens": 3318,
"outputTokens": 6,
"latencyMs": 1484.964082999999
},
{
"questionId": "q22",
"format": "csv",
"model": "gpt-5-nano",
"expected": "111314",
"actual": "111314",
"isCorrect": true,
"inputTokens": 2382,
"outputTokens": 72,
"latencyMs": 23689.366624999995
},
{
"questionId": "q22",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "111314",
"actual": "111314",
"isCorrect": true,
"inputTokens": 2857,
"outputTokens": 6,
"latencyMs": 1258.0295830000105
},
{
"questionId": "q22",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "111314",
"actual": "111314",
"isCorrect": true,
"inputTokens": 3192,
"outputTokens": 6,
"latencyMs": 18510.087583
},
{
"questionId": "q22",
"format": "xml",
"model": "gpt-5-nano",
"expected": "111314",
"actual": "111314",
"isCorrect": true,
"inputTokens": 7358,
"outputTokens": 136,
"latencyMs": 2856.495458000005
},
{
"questionId": "q22",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "111314",
"actual": "111314",
"isCorrect": true,
"inputTokens": 9361,
"outputTokens": 6,
"latencyMs": 1031.8081669999956
},
{
"questionId": "q22",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "111314",
"actual": "111314",
"isCorrect": true,
"inputTokens": 9098,
"outputTokens": 6,
"latencyMs": 2408.5496249999997
},
{
"questionId": "q22",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "111314",
"actual": "111314",
"isCorrect": true,
"inputTokens": 5013,
"outputTokens": 72,
"latencyMs": 2405.9946670000063
},
{
"questionId": "q22",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "111314",
"actual": "111314",
"isCorrect": true,
"inputTokens": 5761,
"outputTokens": 6,
"latencyMs": 1855.128291999994
},
{
"questionId": "q22",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "111314",
"actual": "111314",
"isCorrect": true,
"inputTokens": 5744,
"outputTokens": 6,
"latencyMs": 14026.715166000009
},
{
"questionId": "q23",
"format": "json",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 6388,
"outputTokens": 71,
"latencyMs": 2613.9667920000065
},
{
"questionId": "q23",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 7868,
"outputTokens": 4,
"latencyMs": 914.9832499999902
},
{
"questionId": "q23",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 7907,
"outputTokens": 1,
"latencyMs": 17605.488457999993
},
{
"questionId": "q23",
"format": "toon",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 2525,
"outputTokens": 455,
"latencyMs": 5491.203125
},
{
"questionId": "q23",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 2980,
"outputTokens": 4,
"latencyMs": 1559.9341249999998
},
{
"questionId": "q23",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 3316,
"outputTokens": 1,
"latencyMs": 12204.927791999988
},
{
"questionId": "q23",
"format": "csv",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 2379,
"outputTokens": 71,
"latencyMs": 4993.148166999992
},
{
"questionId": "q23",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 2854,
"outputTokens": 4,
"latencyMs": 1479.5367499999993
},
{
"questionId": "q23",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 3190,
"outputTokens": 1,
"latencyMs": 2016.5271659999999
},
{
"questionId": "q23",
"format": "xml",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 7355,
"outputTokens": 135,
"latencyMs": 3785.880541999999
},
{
"questionId": "q23",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 9358,
"outputTokens": 4,
"latencyMs": 1170.9521249999962
},
{
"questionId": "q23",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 9096,
"outputTokens": 1,
"latencyMs": 2376.3025000000052
},
{
"questionId": "q23",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 5010,
"outputTokens": 71,
"latencyMs": 12974.991708999994
},
{
"questionId": "q23",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 5758,
"outputTokens": 4,
"latencyMs": 1062.6410830000095
},
{
"questionId": "q23",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 5742,
"outputTokens": 1,
"latencyMs": 2375.1459170000016
},
{
"questionId": "q24",
"format": "json",
"model": "gpt-5-nano",
"expected": "laurel54@yahoo.com",
"actual": "laurel54@yahoo.com",
"isCorrect": true,
"inputTokens": 6390,
"outputTokens": 331,
"latencyMs": 7831.431874999995
},
{
"questionId": "q24",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "laurel54@yahoo.com",
"actual": "laurel54@yahoo.com",
"isCorrect": true,
"inputTokens": 7869,
"outputTokens": 10,
"latencyMs": 1169.4948749999894
},
{
"questionId": "q24",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "laurel54@yahoo.com",
"actual": "laurel54@yahoo.com",
"isCorrect": true,
"inputTokens": 7908,
"outputTokens": 8,
"latencyMs": 6873.670041000005
},
{
"questionId": "q24",
"format": "toon",
"model": "gpt-5-nano",
"expected": "laurel54@yahoo.com",
"actual": "laurel54@yahoo.com",
"isCorrect": true,
"inputTokens": 2527,
"outputTokens": 139,
"latencyMs": 2733.310750000004
},
{
"questionId": "q24",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "laurel54@yahoo.com",
"actual": "laurel54@yahoo.com",
"isCorrect": true,
"inputTokens": 2981,
"outputTokens": 10,
"latencyMs": 1465.5957500000077
},
{
"questionId": "q24",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "laurel54@yahoo.com",
"actual": "laurel54@yahoo.com",
"isCorrect": true,
"inputTokens": 3317,
"outputTokens": 8,
"latencyMs": 12162.723041999998
},
{
"questionId": "q24",
"format": "csv",
"model": "gpt-5-nano",
"expected": "laurel54@yahoo.com",
"actual": "laurel54@yahoo.com",
"isCorrect": true,
"inputTokens": 2381,
"outputTokens": 203,
"latencyMs": 2401.237958999991
},
{
"questionId": "q24",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "laurel54@yahoo.com",
"actual": "laurel54@yahoo.com",
"isCorrect": true,
"inputTokens": 2855,
"outputTokens": 10,
"latencyMs": 976.5733749999927
},
{
"questionId": "q24",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "laurel54@yahoo.com",
"actual": "laurel54@yahoo.com",
"isCorrect": true,
"inputTokens": 3191,
"outputTokens": 8,
"latencyMs": 1773.305250000005
},
{
"questionId": "q24",
"format": "xml",
"model": "gpt-5-nano",
"expected": "laurel54@yahoo.com",
"actual": "laurel54@yahoo.com",
"isCorrect": true,
"inputTokens": 7357,
"outputTokens": 395,
"latencyMs": 6293.676041999992
},
{
"questionId": "q24",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "laurel54@yahoo.com",
"actual": "laurel54@yahoo.com",
"isCorrect": true,
"inputTokens": 9359,
"outputTokens": 10,
"latencyMs": 1263.188875000007
},
{
"questionId": "q24",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "laurel54@yahoo.com",
"actual": "laurel54@yahoo.com",
"isCorrect": true,
"inputTokens": 9097,
"outputTokens": 8,
"latencyMs": 1866.224624999988
},
{
"questionId": "q24",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "laurel54@yahoo.com",
"actual": "laurel54@yahoo.com",
"isCorrect": true,
"inputTokens": 5012,
"outputTokens": 75,
"latencyMs": 1734.0090409999975
},
{
"questionId": "q24",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "laurel54@yahoo.com",
"actual": "laurel54@yahoo.com",
"isCorrect": true,
"inputTokens": 5759,
"outputTokens": 10,
"latencyMs": 1076.4865419999987
},
{
"questionId": "q24",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "laurel54@yahoo.com",
"actual": "laurel54@yahoo.com",
"isCorrect": true,
"inputTokens": 5743,
"outputTokens": 8,
"latencyMs": 1799.7341250000027
},
{
"questionId": "q25",
"format": "json",
"model": "gpt-5-nano",
"expected": "89553",
"actual": "89553",
"isCorrect": true,
"inputTokens": 6391,
"outputTokens": 136,
"latencyMs": 4268.888999999996
},
{
"questionId": "q25",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "89553",
"actual": "89553",
"isCorrect": true,
"inputTokens": 7873,
"outputTokens": 6,
"latencyMs": 1100.426707999999
},
{
"questionId": "q25",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "89553",
"actual": "89553",
"isCorrect": true,
"inputTokens": 7910,
"outputTokens": 5,
"latencyMs": 905.148000000001
},
{
"questionId": "q25",
"format": "toon",
"model": "gpt-5-nano",
"expected": "89553",
"actual": "89553",
"isCorrect": true,
"inputTokens": 2528,
"outputTokens": 72,
"latencyMs": 3470.1760000000068
},
{
"questionId": "q25",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "89553",
"actual": "89553",
"isCorrect": true,
"inputTokens": 2985,
"outputTokens": 6,
"latencyMs": 1239.0414170000004
},
{
"questionId": "q25",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "89553",
"actual": "89553",
"isCorrect": true,
"inputTokens": 3319,
"outputTokens": 5,
"latencyMs": 3012.1026249999995
},
{
"questionId": "q25",
"format": "csv",
"model": "gpt-5-nano",
"expected": "89553",
"actual": "89553",
"isCorrect": true,
"inputTokens": 2382,
"outputTokens": 72,
"latencyMs": 4932.565208
},
{
"questionId": "q25",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "89553",
"actual": "89553",
"isCorrect": true,
"inputTokens": 2859,
"outputTokens": 6,
"latencyMs": 923.8483330000017
},
{
"questionId": "q25",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "89553",
"actual": "89553",
"isCorrect": true,
"inputTokens": 3193,
"outputTokens": 5,
"latencyMs": 1677.830792000008
},
{
"questionId": "q25",
"format": "xml",
"model": "gpt-5-nano",
"expected": "89553",
"actual": "89553",
"isCorrect": true,
"inputTokens": 7358,
"outputTokens": 200,
"latencyMs": 4701.415708
},
{
"questionId": "q25",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "89553",
"actual": "89553",
"isCorrect": true,
"inputTokens": 9363,
"outputTokens": 6,
"latencyMs": 1366.9058340000047
},
{
"questionId": "q25",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "89553",
"actual": "89553",
"isCorrect": true,
"inputTokens": 9099,
"outputTokens": 5,
"latencyMs": 1693.0314170000056
},
{
"questionId": "q25",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "89553",
"actual": "89553",
"isCorrect": true,
"inputTokens": 5013,
"outputTokens": 136,
"latencyMs": 5666.829292000009
},
{
"questionId": "q25",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "89553",
"actual": "89553",
"isCorrect": true,
"inputTokens": 5763,
"outputTokens": 6,
"latencyMs": 1181.8469999999943
},
{
"questionId": "q25",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "89553",
"actual": "89553",
"isCorrect": true,
"inputTokens": 5745,
"outputTokens": 5,
"latencyMs": 2083.4975829999894
},
{
"questionId": "q26",
"format": "json",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 6388,
"outputTokens": 71,
"latencyMs": 2986.76112499999
},
{
"questionId": "q26",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 7866,
"outputTokens": 4,
"latencyMs": 1736.9273340000072
},
{
"questionId": "q26",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 7907,
"outputTokens": 1,
"latencyMs": 1777.5319579999923
},
{
"questionId": "q26",
"format": "toon",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 2525,
"outputTokens": 71,
"latencyMs": 2717.0237919999927
},
{
"questionId": "q26",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 2978,
"outputTokens": 4,
"latencyMs": 874.0303339999955
},
{
"questionId": "q26",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 3316,
"outputTokens": 1,
"latencyMs": 5675.357959000001
},
{
"questionId": "q26",
"format": "csv",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 2379,
"outputTokens": 71,
"latencyMs": 3198.773958000005
},
{
"questionId": "q26",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 2852,
"outputTokens": 4,
"latencyMs": 1085.409707999992
},
{
"questionId": "q26",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 3190,
"outputTokens": 1,
"latencyMs": 1932.898749999993
},
{
"questionId": "q26",
"format": "xml",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 7355,
"outputTokens": 135,
"latencyMs": 4096.534249999997
},
{
"questionId": "q26",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 9356,
"outputTokens": 4,
"latencyMs": 1258.4983749999956
},
{
"questionId": "q26",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 9096,
"outputTokens": 1,
"latencyMs": 2413.0945409999986
},
{
"questionId": "q26",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 5010,
"outputTokens": 71,
"latencyMs": 3148.736499999999
},
{
"questionId": "q26",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 5756,
"outputTokens": 4,
"latencyMs": 1131.4892499999987
},
{
"questionId": "q26",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 5742,
"outputTokens": 1,
"latencyMs": 1526.3339579999883
},
{
"questionId": "q27",
"format": "json",
"model": "gpt-5-nano",
"expected": "jayme.kertzmann77@gmail.com",
"actual": "jayme.kertzmann77@gmail.com",
"isCorrect": true,
"inputTokens": 6391,
"outputTokens": 142,
"latencyMs": 2969.5719580000004
},
{
"questionId": "q27",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "jayme.kertzmann77@gmail.com",
"actual": "jayme.kertzmann77@gmail.com",
"isCorrect": true,
"inputTokens": 7871,
"outputTokens": 14,
"latencyMs": 2196.764500000005
},
{
"questionId": "q27",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "jayme.kertzmann77@gmail.com",
"actual": "jayme.kertzmann77@gmail.com",
"isCorrect": true,
"inputTokens": 7910,
"outputTokens": 12,
"latencyMs": 1040.4618750000081
},
{
"questionId": "q27",
"format": "toon",
"model": "gpt-5-nano",
"expected": "jayme.kertzmann77@gmail.com",
"actual": "jayme.kertzmann77@gmail.com",
"isCorrect": true,
"inputTokens": 2528,
"outputTokens": 78,
"latencyMs": 3091.4898329999996
},
{
"questionId": "q27",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "jayme.kertzmann77@gmail.com",
"actual": "jayme.kertzmann77@gmail.com",
"isCorrect": true,
"inputTokens": 2983,
"outputTokens": 14,
"latencyMs": 1001.9885000000068
},
{
"questionId": "q27",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "jayme.kertzmann77@gmail.com",
"actual": "jayme.kertzmann77@gmail.com",
"isCorrect": true,
"inputTokens": 3319,
"outputTokens": 12,
"latencyMs": 3467.2665410000045
},
{
"questionId": "q27",
"format": "csv",
"model": "gpt-5-nano",
"expected": "jayme.kertzmann77@gmail.com",
"actual": "jayme.kertzmann77@gmail.com",
"isCorrect": true,
"inputTokens": 2382,
"outputTokens": 78,
"latencyMs": 5917.028874999989
},
{
"questionId": "q27",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "jayme.kertzmann77@gmail.com",
"actual": "jayme.kertzmann77@gmail.com",
"isCorrect": true,
"inputTokens": 2857,
"outputTokens": 14,
"latencyMs": 1305.7503750000033
},
{
"questionId": "q27",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "jayme.kertzmann77@gmail.com",
"actual": "jayme.kertzmann77@gmail.com",
"isCorrect": true,
"inputTokens": 3193,
"outputTokens": 12,
"latencyMs": 2613.1883329999982
},
{
"questionId": "q27",
"format": "xml",
"model": "gpt-5-nano",
"expected": "jayme.kertzmann77@gmail.com",
"actual": "jayme.kertzmann77@gmail.com",
"isCorrect": true,
"inputTokens": 7358,
"outputTokens": 142,
"latencyMs": 2786.5942090000026
},
{
"questionId": "q27",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "jayme.kertzmann77@gmail.com",
"actual": "jayme.kertzmann77@gmail.com",
"isCorrect": true,
"inputTokens": 9361,
"outputTokens": 14,
"latencyMs": 2270.722458999997
},
{
"questionId": "q27",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "jayme.kertzmann77@gmail.com",
"actual": "jayme.kertzmann77@gmail.com",
"isCorrect": true,
"inputTokens": 9099,
"outputTokens": 12,
"latencyMs": 1157.144708000007
},
{
"questionId": "q27",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "jayme.kertzmann77@gmail.com",
"actual": "jayme.kertzmann77@gmail.com",
"isCorrect": true,
"inputTokens": 5013,
"outputTokens": 142,
"latencyMs": 3469.4895829999878
},
{
"questionId": "q27",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "jayme.kertzmann77@gmail.com",
"actual": "jayme.kertzmann77@gmail.com",
"isCorrect": true,
"inputTokens": 5761,
"outputTokens": 14,
"latencyMs": 1359.8917079999956
},
{
"questionId": "q27",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "jayme.kertzmann77@gmail.com",
"actual": "jayme.kertzmann77@gmail.com",
"isCorrect": true,
"inputTokens": 5745,
"outputTokens": 12,
"latencyMs": 2318.6192080000037
},
{
"questionId": "q28",
"format": "json",
"model": "gpt-5-nano",
"expected": "104053",
"actual": "104053",
"isCorrect": true,
"inputTokens": 6390,
"outputTokens": 136,
"latencyMs": 4774.099707999994
},
{
"questionId": "q28",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "104053",
"actual": "104053",
"isCorrect": true,
"inputTokens": 7871,
"outputTokens": 6,
"latencyMs": 1098.6865830000024
},
{
"questionId": "q28",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "104053",
"actual": "104053",
"isCorrect": true,
"inputTokens": 7909,
"outputTokens": 6,
"latencyMs": 1239.2771659999999
},
{
"questionId": "q28",
"format": "toon",
"model": "gpt-5-nano",
"expected": "104053",
"actual": "104053",
"isCorrect": true,
"inputTokens": 2527,
"outputTokens": 136,
"latencyMs": 5861.847667000009
},
{
"questionId": "q28",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "104053",
"actual": "104053",
"isCorrect": true,
"inputTokens": 2983,
"outputTokens": 6,
"latencyMs": 1297.473874999996
},
{
"questionId": "q28",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "104053",
"actual": "104053",
"isCorrect": true,
"inputTokens": 3318,
"outputTokens": 6,
"latencyMs": 1698.9040830000013
},
{
"questionId": "q28",
"format": "csv",
"model": "gpt-5-nano",
"expected": "104053",
"actual": "104053",
"isCorrect": true,
"inputTokens": 2381,
"outputTokens": 72,
"latencyMs": 7521.450750000004
},
{
"questionId": "q28",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "104053",
"actual": "104053",
"isCorrect": true,
"inputTokens": 2857,
"outputTokens": 6,
"latencyMs": 989.1705420000071
},
{
"questionId": "q28",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "104053",
"actual": "104053",
"isCorrect": true,
"inputTokens": 3192,
"outputTokens": 6,
"latencyMs": 1598.6000829999975
},
{
"questionId": "q28",
"format": "xml",
"model": "gpt-5-nano",
"expected": "104053",
"actual": "104053",
"isCorrect": true,
"inputTokens": 7357,
"outputTokens": 136,
"latencyMs": 4121.990666000012
},
{
"questionId": "q28",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "104053",
"actual": "104053",
"isCorrect": true,
"inputTokens": 9361,
"outputTokens": 6,
"latencyMs": 1153.3577499999956
},
{
"questionId": "q28",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "104053",
"actual": "104053",
"isCorrect": true,
"inputTokens": 9098,
"outputTokens": 6,
"latencyMs": 5119.164292000001
},
{
"questionId": "q28",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "104053",
"actual": "104053",
"isCorrect": true,
"inputTokens": 5012,
"outputTokens": 136,
"latencyMs": 5101.831541000007
},
{
"questionId": "q28",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "104053",
"actual": "104053",
"isCorrect": true,
"inputTokens": 5761,
"outputTokens": 6,
"latencyMs": 1048.2691250000062
},
{
"questionId": "q28",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "104053",
"actual": "104053",
"isCorrect": true,
"inputTokens": 5744,
"outputTokens": 6,
"latencyMs": 2109.3487500000047
},
{
"questionId": "q29",
"format": "json",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 6391,
"outputTokens": 135,
"latencyMs": 3792.2222499999916
},
{
"questionId": "q29",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 7872,
"outputTokens": 4,
"latencyMs": 1203.301084000006
},
{
"questionId": "q29",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 7910,
"outputTokens": 1,
"latencyMs": 1963.9974580000126
},
{
"questionId": "q29",
"format": "toon",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 2528,
"outputTokens": 135,
"latencyMs": 3127.7867909999914
},
{
"questionId": "q29",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 2984,
"outputTokens": 4,
"latencyMs": 1192.564333000002
},
{
"questionId": "q29",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 3319,
"outputTokens": 1,
"latencyMs": 2034.2360419999895
},
{
"questionId": "q29",
"format": "csv",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 2382,
"outputTokens": 71,
"latencyMs": 2648.283917000008
},
{
"questionId": "q29",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 2858,
"outputTokens": 4,
"latencyMs": 902.732290999993
},
{
"questionId": "q29",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 3193,
"outputTokens": 1,
"latencyMs": 2174.387124999994
},
{
"questionId": "q29",
"format": "xml",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 7358,
"outputTokens": 71,
"latencyMs": 2300.0212080000056
},
{
"questionId": "q29",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 9362,
"outputTokens": 4,
"latencyMs": 963.8994999999995
},
{
"questionId": "q29",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 9099,
"outputTokens": 1,
"latencyMs": 4195.405083000005
},
{
"questionId": "q29",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 5013,
"outputTokens": 135,
"latencyMs": 3398.262333999999
},
{
"questionId": "q29",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 5762,
"outputTokens": 4,
"latencyMs": 1032.8332079999964
},
{
"questionId": "q29",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 5745,
"outputTokens": 1,
"latencyMs": 2265.614916999999
},
{
"questionId": "q30",
"format": "json",
"model": "gpt-5-nano",
"expected": "carley.bauch@yahoo.com",
"actual": "carley.bauch@yahoo.com",
"isCorrect": true,
"inputTokens": 6390,
"outputTokens": 76,
"latencyMs": 2575.189624999999
},
{
"questionId": "q30",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "carley.bauch@yahoo.com",
"actual": "carley.bauch@yahoo.com",
"isCorrect": true,
"inputTokens": 7869,
"outputTokens": 12,
"latencyMs": 1003.463208000001
},
{
"questionId": "q30",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "carley.bauch@yahoo.com",
"actual": "carley.bauch@yahoo.com",
"isCorrect": true,
"inputTokens": 7909,
"outputTokens": 9,
"latencyMs": 1218.547916999989
},
{
"questionId": "q30",
"format": "toon",
"model": "gpt-5-nano",
"expected": "carley.bauch@yahoo.com",
"actual": "carley.bauch@yahoo.com",
"isCorrect": true,
"inputTokens": 2527,
"outputTokens": 76,
"latencyMs": 17850.385834000015
},
{
"questionId": "q30",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "carley.bauch@yahoo.com",
"actual": "carley.bauch@yahoo.com",
"isCorrect": true,
"inputTokens": 2981,
"outputTokens": 12,
"latencyMs": 1060.4747919999936
},
{
"questionId": "q30",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "carley.bauch@yahoo.com",
"actual": "carley.bauch@yahoo.com",
"isCorrect": true,
"inputTokens": 3318,
"outputTokens": 9,
"latencyMs": 2927.220583000002
},
{
"questionId": "q30",
"format": "csv",
"model": "gpt-5-nano",
"expected": "carley.bauch@yahoo.com",
"actual": "carley.bauch@yahoo.com",
"isCorrect": true,
"inputTokens": 2381,
"outputTokens": 140,
"latencyMs": 2492.920542000007
},
{
"questionId": "q30",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "carley.bauch@yahoo.com",
"actual": "carley.bauch@yahoo.com",
"isCorrect": true,
"inputTokens": 2855,
"outputTokens": 12,
"latencyMs": 1167.4384590000118
},
{
"questionId": "q30",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "carley.bauch@yahoo.com",
"actual": "carley.bauch@yahoo.com",
"isCorrect": true,
"inputTokens": 3192,
"outputTokens": 9,
"latencyMs": 1760.1724159999867
},
{
"questionId": "q30",
"format": "xml",
"model": "gpt-5-nano",
"expected": "carley.bauch@yahoo.com",
"actual": "carley.bauch@yahoo.com",
"isCorrect": true,
"inputTokens": 7357,
"outputTokens": 76,
"latencyMs": 2586.2806249999994
},
{
"questionId": "q30",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "carley.bauch@yahoo.com",
"actual": "carley.bauch@yahoo.com",
"isCorrect": true,
"inputTokens": 9359,
"outputTokens": 12,
"latencyMs": 1827.6337499999936
},
{
"questionId": "q30",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "carley.bauch@yahoo.com",
"actual": "carley.bauch@yahoo.com",
"isCorrect": true,
"inputTokens": 9098,
"outputTokens": 9,
"latencyMs": 1985.0590000000084
},
{
"questionId": "q30",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "carley.bauch@yahoo.com",
"actual": "carley.bauch@yahoo.com",
"isCorrect": true,
"inputTokens": 5012,
"outputTokens": 76,
"latencyMs": 2150.4795000000013
},
{
"questionId": "q30",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "carley.bauch@yahoo.com",
"actual": "carley.bauch@yahoo.com",
"isCorrect": true,
"inputTokens": 5759,
"outputTokens": 12,
"latencyMs": 1151.3658339999965
},
{
"questionId": "q30",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "carley.bauch@yahoo.com",
"actual": "carley.bauch@yahoo.com",
"isCorrect": true,
"inputTokens": 5744,
"outputTokens": 9,
"latencyMs": 2104.947874999998
},
{
"questionId": "q31",
"format": "json",
"model": "gpt-5-nano",
"expected": "142029",
"actual": "142029",
"isCorrect": true,
"inputTokens": 6393,
"outputTokens": 136,
"latencyMs": 2204.857333000007
},
{
"questionId": "q31",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "142029",
"actual": "142029",
"isCorrect": true,
"inputTokens": 7874,
"outputTokens": 6,
"latencyMs": 1366.9736249999987
},
{
"questionId": "q31",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "142029",
"actual": "142029",
"isCorrect": true,
"inputTokens": 7911,
"outputTokens": 6,
"latencyMs": 1108.5303330000024
},
{
"questionId": "q31",
"format": "toon",
"model": "gpt-5-nano",
"expected": "142029",
"actual": "142029",
"isCorrect": true,
"inputTokens": 2530,
"outputTokens": 136,
"latencyMs": 2809.3447089999972
},
{
"questionId": "q31",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "142029",
"actual": "142029",
"isCorrect": true,
"inputTokens": 2986,
"outputTokens": 6,
"latencyMs": 985.2792080000072
},
{
"questionId": "q31",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "142029",
"actual": "142029",
"isCorrect": true,
"inputTokens": 3320,
"outputTokens": 6,
"latencyMs": 1869.5062499999913
},
{
"questionId": "q31",
"format": "csv",
"model": "gpt-5-nano",
"expected": "142029",
"actual": "142029",
"isCorrect": true,
"inputTokens": 2384,
"outputTokens": 136,
"latencyMs": 2816.2447910000046
},
{
"questionId": "q31",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "142029",
"actual": "142029",
"isCorrect": true,
"inputTokens": 2860,
"outputTokens": 6,
"latencyMs": 1038.263666999992
},
{
"questionId": "q31",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "142029",
"actual": "142029",
"isCorrect": true,
"inputTokens": 3194,
"outputTokens": 6,
"latencyMs": 1011.8830000000016
},
{
"questionId": "q31",
"format": "xml",
"model": "gpt-5-nano",
"expected": "142029",
"actual": "142029",
"isCorrect": true,
"inputTokens": 7360,
"outputTokens": 200,
"latencyMs": 2650.324915999983
},
{
"questionId": "q31",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "142029",
"actual": "142029",
"isCorrect": true,
"inputTokens": 9364,
"outputTokens": 6,
"latencyMs": 1139.189167000004
},
{
"questionId": "q31",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "142029",
"actual": "142029",
"isCorrect": true,
"inputTokens": 9100,
"outputTokens": 6,
"latencyMs": 1773.4112920000043
},
{
"questionId": "q31",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "142029",
"actual": "142029",
"isCorrect": true,
"inputTokens": 5015,
"outputTokens": 136,
"latencyMs": 2481.3391249999986
},
{
"questionId": "q31",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "142029",
"actual": "142029",
"isCorrect": true,
"inputTokens": 5764,
"outputTokens": 6,
"latencyMs": 1290.1707079999906
},
{
"questionId": "q31",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "142029",
"actual": "142029",
"isCorrect": true,
"inputTokens": 5746,
"outputTokens": 6,
"latencyMs": 2289.944292
},
{
"questionId": "q32",
"format": "json",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 6389,
"outputTokens": 135,
"latencyMs": 4142.8067919999885
},
{
"questionId": "q32",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 7869,
"outputTokens": 4,
"latencyMs": 1067.801999999996
},
{
"questionId": "q32",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 7908,
"outputTokens": 1,
"latencyMs": 1057.6598330000124
},
{
"questionId": "q32",
"format": "toon",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 2526,
"outputTokens": 135,
"latencyMs": 2198.369875000004
},
{
"questionId": "q32",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 2981,
"outputTokens": 4,
"latencyMs": 1228.235249999998
},
{
"questionId": "q32",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 3317,
"outputTokens": 1,
"latencyMs": 2113.6464160000032
},
{
"questionId": "q32",
"format": "csv",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 2380,
"outputTokens": 135,
"latencyMs": 2331.9615420000046
},
{
"questionId": "q32",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 2855,
"outputTokens": 4,
"latencyMs": 1010.4068330000155
},
{
"questionId": "q32",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 3191,
"outputTokens": 1,
"latencyMs": 1529.0002080000122
},
{
"questionId": "q32",
"format": "xml",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 7356,
"outputTokens": 199,
"latencyMs": 4986.682375000004
},
{
"questionId": "q32",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 9359,
"outputTokens": 4,
"latencyMs": 1295.2261669999862
},
{
"questionId": "q32",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 9097,
"outputTokens": 1,
"latencyMs": 2608.518458000006
},
{
"questionId": "q32",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 5011,
"outputTokens": 71,
"latencyMs": 1683.7294159999874
},
{
"questionId": "q32",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 5759,
"outputTokens": 4,
"latencyMs": 1466.112374999997
},
{
"questionId": "q32",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 5743,
"outputTokens": 1,
"latencyMs": 2186.13829100001
},
{
"questionId": "q33",
"format": "json",
"model": "gpt-5-nano",
"expected": "cheyenne_skiles@hotmail.com",
"actual": "cheyenne_skiles@hotmail.com",
"isCorrect": true,
"inputTokens": 6393,
"outputTokens": 204,
"latencyMs": 4101.640291000018
},
{
"questionId": "q33",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "cheyenne_skiles@hotmail.com",
"actual": "cheyenne_skiles@hotmail.com",
"isCorrect": true,
"inputTokens": 7872,
"outputTokens": 14,
"latencyMs": 1355.6347499999974
},
{
"questionId": "q33",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "cheyenne_skiles@hotmail.com",
"actual": "cheyenne_skiles@hotmail.com",
"isCorrect": true,
"inputTokens": 7911,
"outputTokens": 9,
"latencyMs": 1218.3612080000166
},
{
"questionId": "q33",
"format": "toon",
"model": "gpt-5-nano",
"expected": "cheyenne_skiles@hotmail.com",
"actual": "cheyenne_skiles@hotmail.com",
"isCorrect": true,
"inputTokens": 2530,
"outputTokens": 140,
"latencyMs": 2800.1185839999816
},
{
"questionId": "q33",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "cheyenne_skiles@hotmail.com",
"actual": "cheyenne_skiles@hotmail.com",
"isCorrect": true,
"inputTokens": 2984,
"outputTokens": 14,
"latencyMs": 1477.837124999991
},
{
"questionId": "q33",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "cheyenne_skiles@hotmail.com",
"actual": "cheyenne_skiles@hotmail.com",
"isCorrect": true,
"inputTokens": 3320,
"outputTokens": 9,
"latencyMs": 1545.5144169999985
},
{
"questionId": "q33",
"format": "csv",
"model": "gpt-5-nano",
"expected": "cheyenne_skiles@hotmail.com",
"actual": "cheyenne_skiles@hotmail.com",
"isCorrect": true,
"inputTokens": 2384,
"outputTokens": 76,
"latencyMs": 3839.476958000014
},
{
"questionId": "q33",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "cheyenne_skiles@hotmail.com",
"actual": "cheyenne_skiles@hotmail.com",
"isCorrect": true,
"inputTokens": 2858,
"outputTokens": 14,
"latencyMs": 1138.701000000001
},
{
"questionId": "q33",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "cheyenne_skiles@hotmail.com",
"actual": "cheyenne_skiles@hotmail.com",
"isCorrect": true,
"inputTokens": 3194,
"outputTokens": 9,
"latencyMs": 928.7706250000047
},
{
"questionId": "q33",
"format": "xml",
"model": "gpt-5-nano",
"expected": "cheyenne_skiles@hotmail.com",
"actual": "cheyenne_skiles@hotmail.com",
"isCorrect": true,
"inputTokens": 7360,
"outputTokens": 140,
"latencyMs": 2666.2794580000045
},
{
"questionId": "q33",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "cheyenne_skiles@hotmail.com",
"actual": "cheyenne_skiles@hotmail.com",
"isCorrect": true,
"inputTokens": 9362,
"outputTokens": 14,
"latencyMs": 2169.680166999984
},
{
"questionId": "q33",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "cheyenne_skiles@hotmail.com",
"actual": "cheyenne_skiles@hotmail.com",
"isCorrect": true,
"inputTokens": 9100,
"outputTokens": 9,
"latencyMs": 1705.846458999993
},
{
"questionId": "q33",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "cheyenne_skiles@hotmail.com",
"actual": "cheyenne_skiles@hotmail.com",
"isCorrect": true,
"inputTokens": 5015,
"outputTokens": 76,
"latencyMs": 2263.530958999996
},
{
"questionId": "q33",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "cheyenne_skiles@hotmail.com",
"actual": "cheyenne_skiles@hotmail.com",
"isCorrect": true,
"inputTokens": 5762,
"outputTokens": 14,
"latencyMs": 1402.7602079999924
},
{
"questionId": "q33",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "cheyenne_skiles@hotmail.com",
"actual": "cheyenne_skiles@hotmail.com",
"isCorrect": true,
"inputTokens": 5746,
"outputTokens": 9,
"latencyMs": 2376.068292000011
},
{
"questionId": "q34",
"format": "json",
"model": "gpt-5-nano",
"expected": "84650",
"actual": "84650",
"isCorrect": true,
"inputTokens": 6391,
"outputTokens": 72,
"latencyMs": 2438.071291
},
{
"questionId": "q34",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "84650",
"actual": "84650",
"isCorrect": true,
"inputTokens": 7871,
"outputTokens": 6,
"latencyMs": 1119.892125000013
},
{
"questionId": "q34",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "84650",
"actual": "84650",
"isCorrect": true,
"inputTokens": 7910,
"outputTokens": 5,
"latencyMs": 1219.9752500000177
},
{
"questionId": "q34",
"format": "toon",
"model": "gpt-5-nano",
"expected": "84650",
"actual": "84650",
"isCorrect": true,
"inputTokens": 2528,
"outputTokens": 136,
"latencyMs": 3074.212375000003
},
{
"questionId": "q34",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "84650",
"actual": "84650",
"isCorrect": true,
"inputTokens": 2983,
"outputTokens": 6,
"latencyMs": 1182.489499999996
},
{
"questionId": "q34",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "84650",
"actual": "84650",
"isCorrect": true,
"inputTokens": 3319,
"outputTokens": 5,
"latencyMs": 2366.0734999999986
},
{
"questionId": "q34",
"format": "csv",
"model": "gpt-5-nano",
"expected": "84650",
"actual": "84650",
"isCorrect": true,
"inputTokens": 2382,
"outputTokens": 72,
"latencyMs": 3682.4087500000023
},
{
"questionId": "q34",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "84650",
"actual": "84650",
"isCorrect": true,
"inputTokens": 2857,
"outputTokens": 6,
"latencyMs": 865.8139159999846
},
{
"questionId": "q34",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "84650",
"actual": "84650",
"isCorrect": true,
"inputTokens": 3193,
"outputTokens": 5,
"latencyMs": 1594.2567079999717
},
{
"questionId": "q34",
"format": "xml",
"model": "gpt-5-nano",
"expected": "84650",
"actual": "84650",
"isCorrect": true,
"inputTokens": 7358,
"outputTokens": 200,
"latencyMs": 9620.968290999997
},
{
"questionId": "q34",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "84650",
"actual": "84650",
"isCorrect": true,
"inputTokens": 9361,
"outputTokens": 6,
"latencyMs": 1066.5026659999858
},
{
"questionId": "q34",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "84650",
"actual": "84650",
"isCorrect": true,
"inputTokens": 9099,
"outputTokens": 5,
"latencyMs": 2701.866624999995
},
{
"questionId": "q34",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "84650",
"actual": "84650",
"isCorrect": true,
"inputTokens": 5013,
"outputTokens": 136,
"latencyMs": 3559.778957999981
},
{
"questionId": "q34",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "84650",
"actual": "84650",
"isCorrect": true,
"inputTokens": 5761,
"outputTokens": 6,
"latencyMs": 1008.4788750000007
},
{
"questionId": "q34",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "84650",
"actual": "84650",
"isCorrect": true,
"inputTokens": 5745,
"outputTokens": 5,
"latencyMs": 1889.822375000018
},
{
"questionId": "q35",
"format": "json",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 6390,
"outputTokens": 71,
"latencyMs": 3083.3981669999775
},
{
"questionId": "q35",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 7871,
"outputTokens": 4,
"latencyMs": 1060.2027909999888
},
{
"questionId": "q35",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 7909,
"outputTokens": 1,
"latencyMs": 1432.9026670000167
},
{
"questionId": "q35",
"format": "toon",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 2527,
"outputTokens": 71,
"latencyMs": 2827.286916000012
},
{
"questionId": "q35",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 2983,
"outputTokens": 4,
"latencyMs": 1606.289208000002
},
{
"questionId": "q35",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 3318,
"outputTokens": 1,
"latencyMs": 1781.2257079999836
},
{
"questionId": "q35",
"format": "csv",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 2381,
"outputTokens": 135,
"latencyMs": 2855.722792000015
},
{
"questionId": "q35",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 2857,
"outputTokens": 4,
"latencyMs": 1140.299874999997
},
{
"questionId": "q35",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 3192,
"outputTokens": 1,
"latencyMs": 2195.365832999989
},
{
"questionId": "q35",
"format": "xml",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 7357,
"outputTokens": 135,
"latencyMs": 2904.48324999999
},
{
"questionId": "q35",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 9361,
"outputTokens": 4,
"latencyMs": 1264.2794160000049
},
{
"questionId": "q35",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 9098,
"outputTokens": 1,
"latencyMs": 3598.464708000014
},
{
"questionId": "q35",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 5012,
"outputTokens": 71,
"latencyMs": 2646.219666000019
},
{
"questionId": "q35",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 5761,
"outputTokens": 4,
"latencyMs": 1090.8027500000026
},
{
"questionId": "q35",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 5744,
"outputTokens": 1,
"latencyMs": 2322.022082999989
},
{
"questionId": "q36",
"format": "json",
"model": "gpt-5-nano",
"expected": "macey.gottlieb5@yahoo.com",
"actual": "macey.gottlieb5@yahoo.com",
"isCorrect": true,
"inputTokens": 6389,
"outputTokens": 78,
"latencyMs": 2498.7566669999796
},
{
"questionId": "q36",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "macey.gottlieb5@yahoo.com",
"actual": "macey.gottlieb5@yahoo.com",
"isCorrect": true,
"inputTokens": 7869,
"outputTokens": 14,
"latencyMs": 1563.026332999987
},
{
"questionId": "q36",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "macey.gottlieb5@yahoo.com",
"actual": "macey.gottlieb5@yahoo.com",
"isCorrect": true,
"inputTokens": 7908,
"outputTokens": 11,
"latencyMs": 1062.8037919999915
},
{
"questionId": "q36",
"format": "toon",
"model": "gpt-5-nano",
"expected": "macey.gottlieb5@yahoo.com",
"actual": "macey.gottlieb5@yahoo.com",
"isCorrect": true,
"inputTokens": 2526,
"outputTokens": 590,
"latencyMs": 9420.16175
},
{
"questionId": "q36",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "macey.gottlieb5@yahoo.com",
"actual": "macey.gottlieb5@yahoo.com",
"isCorrect": true,
"inputTokens": 2981,
"outputTokens": 14,
"latencyMs": 1038.3448750000098
},
{
"questionId": "q36",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "macey.gottlieb5@yahoo.com",
"actual": "macey.gottlieb5@yahoo.com",
"isCorrect": true,
"inputTokens": 3317,
"outputTokens": 11,
"latencyMs": 3468.648833000014
},
{
"questionId": "q36",
"format": "csv",
"model": "gpt-5-nano",
"expected": "macey.gottlieb5@yahoo.com",
"actual": "macey.gottlieb5@yahoo.com",
"isCorrect": true,
"inputTokens": 2380,
"outputTokens": 142,
"latencyMs": 3061.706208000018
},
{
"questionId": "q36",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "macey.gottlieb5@yahoo.com",
"actual": "macey.gottlieb5@yahoo.com",
"isCorrect": true,
"inputTokens": 2855,
"outputTokens": 14,
"latencyMs": 1053.0741669999843
},
{
"questionId": "q36",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "macey.gottlieb5@yahoo.com",
"actual": "macey.gottlieb5@yahoo.com",
"isCorrect": true,
"inputTokens": 3191,
"outputTokens": 11,
"latencyMs": 1576.9219160000212
},
{
"questionId": "q36",
"format": "xml",
"model": "gpt-5-nano",
"expected": "macey.gottlieb5@yahoo.com",
"actual": "macey.gottlieb5@yahoo.com",
"isCorrect": true,
"inputTokens": 7356,
"outputTokens": 78,
"latencyMs": 1889.579624999984
},
{
"questionId": "q36",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "macey.gottlieb5@yahoo.com",
"actual": "macey.gottlieb5@yahoo.com",
"isCorrect": true,
"inputTokens": 9359,
"outputTokens": 14,
"latencyMs": 1520.9462920000078
},
{
"questionId": "q36",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "macey.gottlieb5@yahoo.com",
"actual": "macey.gottlieb5@yahoo.com",
"isCorrect": true,
"inputTokens": 9097,
"outputTokens": 11,
"latencyMs": 1917.4184999999998
},
{
"questionId": "q36",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "macey.gottlieb5@yahoo.com",
"actual": "macey.gottlieb5@yahoo.com",
"isCorrect": true,
"inputTokens": 5011,
"outputTokens": 142,
"latencyMs": 4630.122166999994
},
{
"questionId": "q36",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "macey.gottlieb5@yahoo.com",
"actual": "macey.gottlieb5@yahoo.com",
"isCorrect": true,
"inputTokens": 5759,
"outputTokens": 14,
"latencyMs": 1646.354083000013
},
{
"questionId": "q36",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "macey.gottlieb5@yahoo.com",
"actual": "macey.gottlieb5@yahoo.com",
"isCorrect": true,
"inputTokens": 5743,
"outputTokens": 11,
"latencyMs": 2197.673375000013
},
{
"questionId": "q37",
"format": "json",
"model": "gpt-5-nano",
"expected": "89773",
"actual": "89773",
"isCorrect": true,
"inputTokens": 6389,
"outputTokens": 72,
"latencyMs": 3646.0600829999894
},
{
"questionId": "q37",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "89773",
"actual": "89773",
"isCorrect": true,
"inputTokens": 7868,
"outputTokens": 6,
"latencyMs": 1356.2343330000003
},
{
"questionId": "q37",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "89773",
"actual": "89773",
"isCorrect": true,
"inputTokens": 7908,
"outputTokens": 5,
"latencyMs": 735.1860419999866
},
{
"questionId": "q37",
"format": "toon",
"model": "gpt-5-nano",
"expected": "89773",
"actual": "89773",
"isCorrect": true,
"inputTokens": 2526,
"outputTokens": 136,
"latencyMs": 2701.791499999992
},
{
"questionId": "q37",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "89773",
"actual": "89773",
"isCorrect": true,
"inputTokens": 2980,
"outputTokens": 6,
"latencyMs": 1259.3909169999824
},
{
"questionId": "q37",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "89773",
"actual": "89773",
"isCorrect": true,
"inputTokens": 3317,
"outputTokens": 5,
"latencyMs": 1960.7033339999907
},
{
"questionId": "q37",
"format": "csv",
"model": "gpt-5-nano",
"expected": "89773",
"actual": "89773",
"isCorrect": true,
"inputTokens": 2380,
"outputTokens": 72,
"latencyMs": 5573.357083999988
},
{
"questionId": "q37",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "89773",
"actual": "89773",
"isCorrect": true,
"inputTokens": 2854,
"outputTokens": 6,
"latencyMs": 1284.3673750000016
},
{
"questionId": "q37",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "89773",
"actual": "89773",
"isCorrect": true,
"inputTokens": 3191,
"outputTokens": 5,
"latencyMs": 2050.5506659999955
},
{
"questionId": "q37",
"format": "xml",
"model": "gpt-5-nano",
"expected": "89773",
"actual": "89773",
"isCorrect": true,
"inputTokens": 7356,
"outputTokens": 136,
"latencyMs": 3253.602791000012
},
{
"questionId": "q37",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "89773",
"actual": "89773",
"isCorrect": true,
"inputTokens": 9358,
"outputTokens": 6,
"latencyMs": 1146.329166999989
},
{
"questionId": "q37",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "89773",
"actual": "89773",
"isCorrect": true,
"inputTokens": 9097,
"outputTokens": 5,
"latencyMs": 2395.673125000001
},
{
"questionId": "q37",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "89773",
"actual": "89773",
"isCorrect": true,
"inputTokens": 5011,
"outputTokens": 72,
"latencyMs": 2913.434957999998
},
{
"questionId": "q37",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "89773",
"actual": "89773",
"isCorrect": true,
"inputTokens": 5758,
"outputTokens": 6,
"latencyMs": 2243.595874999999
},
{
"questionId": "q37",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "89773",
"actual": "89773",
"isCorrect": true,
"inputTokens": 5743,
"outputTokens": 5,
"latencyMs": 1839.661374999996
},
{
"questionId": "q38",
"format": "json",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 6389,
"outputTokens": 135,
"latencyMs": 2779.79579199999
},
{
"questionId": "q38",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 7868,
"outputTokens": 4,
"latencyMs": 1133.7338750000054
},
{
"questionId": "q38",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 7908,
"outputTokens": 1,
"latencyMs": 774.6977079999924
},
{
"questionId": "q38",
"format": "toon",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 2526,
"outputTokens": 71,
"latencyMs": 4311.999750000017
},
{
"questionId": "q38",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 2980,
"outputTokens": 4,
"latencyMs": 2223.9427499999874
},
{
"questionId": "q38",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 3317,
"outputTokens": 1,
"latencyMs": 2975.953125
},
{
"questionId": "q38",
"format": "csv",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 2380,
"outputTokens": 71,
"latencyMs": 4617.852291999996
},
{
"questionId": "q38",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 2854,
"outputTokens": 4,
"latencyMs": 1096.2197500000184
},
{
"questionId": "q38",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 3191,
"outputTokens": 1,
"latencyMs": 2754.3287919999857
},
{
"questionId": "q38",
"format": "xml",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 7356,
"outputTokens": 135,
"latencyMs": 3539.3821250000037
},
{
"questionId": "q38",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 9358,
"outputTokens": 4,
"latencyMs": 1369.516082999995
},
{
"questionId": "q38",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 9097,
"outputTokens": 1,
"latencyMs": 2677.958791000012
},
{
"questionId": "q38",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 5011,
"outputTokens": 71,
"latencyMs": 2209.974041999987
},
{
"questionId": "q38",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 5758,
"outputTokens": 4,
"latencyMs": 1352.3056670000078
},
{
"questionId": "q38",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "Marketing",
"actual": "Marketing",
"isCorrect": true,
"inputTokens": 5743,
"outputTokens": 1,
"latencyMs": 2126.258208000014
},
{
"questionId": "q39",
"format": "json",
"model": "gpt-5-nano",
"expected": "georgianna_renner@yahoo.com",
"actual": "georgianna_renner@yahoo.com",
"isCorrect": true,
"inputTokens": 6389,
"outputTokens": 207,
"latencyMs": 3999.7677079999994
},
{
"questionId": "q39",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "georgianna_renner@yahoo.com",
"actual": "georgianna_renner@yahoo.com",
"isCorrect": true,
"inputTokens": 7869,
"outputTokens": 13,
"latencyMs": 1170.8554579999764
},
{
"questionId": "q39",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "georgianna_renner@yahoo.com",
"actual": "georgianna_renner@yahoo.com",
"isCorrect": true,
"inputTokens": 7908,
"outputTokens": 10,
"latencyMs": 1278.5721670000057
},
{
"questionId": "q39",
"format": "toon",
"model": "gpt-5-nano",
"expected": "georgianna_renner@yahoo.com",
"actual": "georgianna_renner@yahoo.com",
"isCorrect": true,
"inputTokens": 2526,
"outputTokens": 143,
"latencyMs": 3334.013791000005
},
{
"questionId": "q39",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "georgianna_renner@yahoo.com",
"actual": "georgianna_renner@yahoo.com",
"isCorrect": true,
"inputTokens": 2981,
"outputTokens": 13,
"latencyMs": 1115.4245419999934
},
{
"questionId": "q39",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "georgianna_renner@yahoo.com",
"actual": "georgianna_renner@yahoo.com",
"isCorrect": true,
"inputTokens": 3317,
"outputTokens": 10,
"latencyMs": 2555.918707999983
},
{
"questionId": "q39",
"format": "csv",
"model": "gpt-5-nano",
"expected": "georgianna_renner@yahoo.com",
"actual": "georgianna_renner@yahoo.com",
"isCorrect": true,
"inputTokens": 2380,
"outputTokens": 143,
"latencyMs": 2100.1043329999957
},
{
"questionId": "q39",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "georgianna_renner@yahoo.com",
"actual": "georgianna_renner@yahoo.com",
"isCorrect": true,
"inputTokens": 2855,
"outputTokens": 13,
"latencyMs": 1298.810999999987
},
{
"questionId": "q39",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "georgianna_renner@yahoo.com",
"actual": "georgianna_renner@yahoo.com",
"isCorrect": true,
"inputTokens": 3191,
"outputTokens": 10,
"latencyMs": 1940.2669170000008
},
{
"questionId": "q39",
"format": "xml",
"model": "gpt-5-nano",
"expected": "georgianna_renner@yahoo.com",
"actual": "georgianna_renner@yahoo.com",
"isCorrect": true,
"inputTokens": 7356,
"outputTokens": 143,
"latencyMs": 2666.5189580000006
},
{
"questionId": "q39",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "georgianna_renner@yahoo.com",
"actual": "georgianna_renner@yahoo.com",
"isCorrect": true,
"inputTokens": 9359,
"outputTokens": 13,
"latencyMs": 1611.7814170000202
},
{
"questionId": "q39",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "georgianna_renner@yahoo.com",
"actual": "georgianna_renner@yahoo.com",
"isCorrect": true,
"inputTokens": 9097,
"outputTokens": 10,
"latencyMs": 1709.3350419999915
},
{
"questionId": "q39",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "georgianna_renner@yahoo.com",
"actual": "georgianna_renner@yahoo.com",
"isCorrect": true,
"inputTokens": 5011,
"outputTokens": 143,
"latencyMs": 4774.929042000003
},
{
"questionId": "q39",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "georgianna_renner@yahoo.com",
"actual": "georgianna_renner@yahoo.com",
"isCorrect": true,
"inputTokens": 5759,
"outputTokens": 13,
"latencyMs": 1369.8504160000011
},
{
"questionId": "q39",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "georgianna_renner@yahoo.com",
"actual": "georgianna_renner@yahoo.com",
"isCorrect": true,
"inputTokens": 5743,
"outputTokens": 10,
"latencyMs": 3123.9857920000213
},
{
"questionId": "q40",
"format": "json",
"model": "gpt-5-nano",
"expected": "49741",
"actual": "49741",
"isCorrect": true,
"inputTokens": 6390,
"outputTokens": 72,
"latencyMs": 2700.2800830000197
},
{
"questionId": "q40",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "49741",
"actual": "49741",
"isCorrect": true,
"inputTokens": 7871,
"outputTokens": 6,
"latencyMs": 1145.983292000019
},
{
"questionId": "q40",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "49741",
"actual": "49741",
"isCorrect": true,
"inputTokens": 7909,
"outputTokens": 5,
"latencyMs": 952.1742089999898
},
{
"questionId": "q40",
"format": "toon",
"model": "gpt-5-nano",
"expected": "49741",
"actual": "49741",
"isCorrect": true,
"inputTokens": 2527,
"outputTokens": 72,
"latencyMs": 2220.3111250000075
},
{
"questionId": "q40",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "49741",
"actual": "49741",
"isCorrect": true,
"inputTokens": 2983,
"outputTokens": 6,
"latencyMs": 981.9718339999963
},
{
"questionId": "q40",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "49741",
"actual": "49741",
"isCorrect": true,
"inputTokens": 3318,
"outputTokens": 5,
"latencyMs": 2079.9035830000066
},
{
"questionId": "q40",
"format": "csv",
"model": "gpt-5-nano",
"expected": "49741",
"actual": "49741",
"isCorrect": true,
"inputTokens": 2381,
"outputTokens": 136,
"latencyMs": 2519.2579590000096
},
{
"questionId": "q40",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "49741",
"actual": "144426",
"isCorrect": false,
"inputTokens": 2857,
"outputTokens": 6,
"latencyMs": 942.0043329999899
},
{
"questionId": "q40",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "49741",
"actual": "49741",
"isCorrect": true,
"inputTokens": 3192,
"outputTokens": 5,
"latencyMs": 1683.0637080000015
},
{
"questionId": "q40",
"format": "xml",
"model": "gpt-5-nano",
"expected": "49741",
"actual": "49741",
"isCorrect": true,
"inputTokens": 7357,
"outputTokens": 72,
"latencyMs": 2190.1603750000068
},
{
"questionId": "q40",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "49741",
"actual": "49741",
"isCorrect": true,
"inputTokens": 9361,
"outputTokens": 6,
"latencyMs": 1771.8361250000016
},
{
"questionId": "q40",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "49741",
"actual": "49741",
"isCorrect": true,
"inputTokens": 9098,
"outputTokens": 5,
"latencyMs": 2376.372875000001
},
{
"questionId": "q40",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "49741",
"actual": "49741",
"isCorrect": true,
"inputTokens": 5012,
"outputTokens": 72,
"latencyMs": 2355.175791000016
},
{
"questionId": "q40",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "49741",
"actual": "49741",
"isCorrect": true,
"inputTokens": 5761,
"outputTokens": 6,
"latencyMs": 1192.191541999986
},
{
"questionId": "q40",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "49741",
"actual": "49741",
"isCorrect": true,
"inputTokens": 5744,
"outputTokens": 5,
"latencyMs": 2328.137166999979
},
{
"questionId": "q41",
"format": "json",
"model": "gpt-5-nano",
"expected": "17",
"actual": "17",
"isCorrect": true,
"inputTokens": 6387,
"outputTokens": 775,
"latencyMs": 11132.566209000011
},
{
"questionId": "q41",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "15",
"isCorrect": false,
"inputTokens": 7865,
"outputTokens": 5,
"latencyMs": 1048.9463749999995
},
{
"questionId": "q41",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "17",
"actual": "13",
"isCorrect": false,
"inputTokens": 7906,
"outputTokens": 2,
"latencyMs": 954.9381670000148
},
{
"questionId": "q41",
"format": "toon",
"model": "gpt-5-nano",
"expected": "17",
"actual": "17",
"isCorrect": true,
"inputTokens": 2524,
"outputTokens": 583,
"latencyMs": 5343.168333000009
},
{
"questionId": "q41",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "15",
"isCorrect": false,
"inputTokens": 2977,
"outputTokens": 5,
"latencyMs": 929.4576249999809
},
{
"questionId": "q41",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "17",
"actual": "17",
"isCorrect": true,
"inputTokens": 3315,
"outputTokens": 2,
"latencyMs": 1230.1574160000018
},
{
"questionId": "q41",
"format": "csv",
"model": "gpt-5-nano",
"expected": "17",
"actual": "17",
"isCorrect": true,
"inputTokens": 2378,
"outputTokens": 1415,
"latencyMs": 16158.150375000027
},
{
"questionId": "q41",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "15",
"isCorrect": false,
"inputTokens": 2851,
"outputTokens": 5,
"latencyMs": 932.4995000000054
},
{
"questionId": "q41",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "17",
"actual": "14",
"isCorrect": false,
"inputTokens": 3189,
"outputTokens": 2,
"latencyMs": 1859.355958
},
{
"questionId": "q41",
"format": "xml",
"model": "gpt-5-nano",
"expected": "17",
"actual": "17",
"isCorrect": true,
"inputTokens": 7354,
"outputTokens": 903,
"latencyMs": 11415.376208000001
},
{
"questionId": "q41",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "15",
"isCorrect": false,
"inputTokens": 9355,
"outputTokens": 5,
"latencyMs": 1198.3916249999893
},
{
"questionId": "q41",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "17",
"actual": "17",
"isCorrect": true,
"inputTokens": 9095,
"outputTokens": 2,
"latencyMs": 3497.0485409999965
},
{
"questionId": "q41",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "17",
"actual": "17",
"isCorrect": true,
"inputTokens": 5009,
"outputTokens": 1031,
"latencyMs": 10859.450207999995
},
{
"questionId": "q41",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "15",
"isCorrect": false,
"inputTokens": 5755,
"outputTokens": 5,
"latencyMs": 2038.0866250000254
},
{
"questionId": "q41",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "17",
"actual": "13",
"isCorrect": false,
"inputTokens": 5741,
"outputTokens": 2,
"latencyMs": 1642.4759159999958
},
{
"questionId": "q42",
"format": "json",
"model": "gpt-5-nano",
"expected": "17",
"actual": "17",
"isCorrect": true,
"inputTokens": 6387,
"outputTokens": 1031,
"latencyMs": 11081.197666000022
},
{
"questionId": "q42",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "15",
"isCorrect": false,
"inputTokens": 7865,
"outputTokens": 5,
"latencyMs": 1095.9497919999994
},
{
"questionId": "q42",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "17",
"actual": "15",
"isCorrect": false,
"inputTokens": 7906,
"outputTokens": 2,
"latencyMs": 1309.7017500000075
},
{
"questionId": "q42",
"format": "toon",
"model": "gpt-5-nano",
"expected": "17",
"actual": "17",
"isCorrect": true,
"inputTokens": 2524,
"outputTokens": 711,
"latencyMs": 9064.612916999991
},
{
"questionId": "q42",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "14",
"isCorrect": false,
"inputTokens": 2977,
"outputTokens": 5,
"latencyMs": 1045.4045000000042
},
{
"questionId": "q42",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "17",
"actual": "15",
"isCorrect": false,
"inputTokens": 3315,
"outputTokens": 2,
"latencyMs": 2056.116624999995
},
{
"questionId": "q42",
"format": "csv",
"model": "gpt-5-nano",
"expected": "17",
"actual": "17",
"isCorrect": true,
"inputTokens": 2378,
"outputTokens": 967,
"latencyMs": 8423.070084000006
},
{
"questionId": "q42",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "15",
"isCorrect": false,
"inputTokens": 2851,
"outputTokens": 5,
"latencyMs": 901.4683749999967
},
{
"questionId": "q42",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "17",
"actual": "14",
"isCorrect": false,
"inputTokens": 3189,
"outputTokens": 2,
"latencyMs": 2192.902625000017
},
{
"questionId": "q42",
"format": "xml",
"model": "gpt-5-nano",
"expected": "17",
"actual": "17",
"isCorrect": true,
"inputTokens": 7354,
"outputTokens": 647,
"latencyMs": 9821.846875000017
},
{
"questionId": "q42",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "15",
"isCorrect": false,
"inputTokens": 9355,
"outputTokens": 5,
"latencyMs": 1586.0259169999918
},
{
"questionId": "q42",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "17",
"actual": "17",
"isCorrect": true,
"inputTokens": 9095,
"outputTokens": 2,
"latencyMs": 9515.369042000006
},
{
"questionId": "q42",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "17",
"actual": "17",
"isCorrect": true,
"inputTokens": 5009,
"outputTokens": 455,
"latencyMs": 5076.419125000015
},
{
"questionId": "q42",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "15",
"isCorrect": false,
"inputTokens": 5755,
"outputTokens": 5,
"latencyMs": 1472.8408340000024
},
{
"questionId": "q42",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "17",
"actual": "15",
"isCorrect": false,
"inputTokens": 5741,
"outputTokens": 2,
"latencyMs": 865.6228749999718
},
{
"questionId": "q43",
"format": "json",
"model": "gpt-5-nano",
"expected": "17",
"actual": "17",
"isCorrect": true,
"inputTokens": 6387,
"outputTokens": 775,
"latencyMs": 8729.67633300001
},
{
"questionId": "q43",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "15",
"isCorrect": false,
"inputTokens": 7865,
"outputTokens": 5,
"latencyMs": 1217.0473749999946
},
{
"questionId": "q43",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "17",
"actual": "17",
"isCorrect": true,
"inputTokens": 7906,
"outputTokens": 2,
"latencyMs": 1158.2075419999892
},
{
"questionId": "q43",
"format": "toon",
"model": "gpt-5-nano",
"expected": "17",
"actual": "17",
"isCorrect": true,
"inputTokens": 2524,
"outputTokens": 775,
"latencyMs": 6998.693750000006
},
{
"questionId": "q43",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "15",
"isCorrect": false,
"inputTokens": 2977,
"outputTokens": 5,
"latencyMs": 1640.0182080000232
},
{
"questionId": "q43",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "17",
"actual": "14",
"isCorrect": false,
"inputTokens": 3315,
"outputTokens": 2,
"latencyMs": 947.1101670000062
},
{
"questionId": "q43",
"format": "csv",
"model": "gpt-5-nano",
"expected": "17",
"actual": "17",
"isCorrect": true,
"inputTokens": 2378,
"outputTokens": 583,
"latencyMs": 13248.978291000007
},
{
"questionId": "q43",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "15",
"isCorrect": false,
"inputTokens": 2851,
"outputTokens": 5,
"latencyMs": 836.4533340000198
},
{
"questionId": "q43",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "17",
"actual": "15",
"isCorrect": false,
"inputTokens": 3189,
"outputTokens": 2,
"latencyMs": 818.1433329999854
},
{
"questionId": "q43",
"format": "xml",
"model": "gpt-5-nano",
"expected": "17",
"actual": "17",
"isCorrect": true,
"inputTokens": 7354,
"outputTokens": 1095,
"latencyMs": 9890.235916000005
},
{
"questionId": "q43",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "15",
"isCorrect": false,
"inputTokens": 9355,
"outputTokens": 5,
"latencyMs": 1320.4134170000034
},
{
"questionId": "q43",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "17",
"actual": "17",
"isCorrect": true,
"inputTokens": 9095,
"outputTokens": 2,
"latencyMs": 4225.577166000003
},
{
"questionId": "q43",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "17",
"actual": "17",
"isCorrect": true,
"inputTokens": 5009,
"outputTokens": 1031,
"latencyMs": 13344.171333000006
},
{
"questionId": "q43",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "15",
"isCorrect": false,
"inputTokens": 5755,
"outputTokens": 5,
"latencyMs": 863.8359160000109
},
{
"questionId": "q43",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "17",
"actual": "15",
"isCorrect": false,
"inputTokens": 5741,
"outputTokens": 2,
"latencyMs": 1194.4381250000151
},
{
"questionId": "q44",
"format": "json",
"model": "gpt-5-nano",
"expected": "17",
"actual": "17",
"isCorrect": true,
"inputTokens": 6387,
"outputTokens": 455,
"latencyMs": 5239.934833000007
},
{
"questionId": "q44",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "15",
"isCorrect": false,
"inputTokens": 7865,
"outputTokens": 5,
"latencyMs": 1124.6063330000034
},
{
"questionId": "q44",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "17",
"actual": "14",
"isCorrect": false,
"inputTokens": 7906,
"outputTokens": 2,
"latencyMs": 1525.701040999993
},
{
"questionId": "q44",
"format": "toon",
"model": "gpt-5-nano",
"expected": "17",
"actual": "17",
"isCorrect": true,
"inputTokens": 2524,
"outputTokens": 519,
"latencyMs": 6195.039833999996
},
{
"questionId": "q44",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "15",
"isCorrect": false,
"inputTokens": 2977,
"outputTokens": 5,
"latencyMs": 891.0962500000023
},
{
"questionId": "q44",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "17",
"actual": "13",
"isCorrect": false,
"inputTokens": 3315,
"outputTokens": 2,
"latencyMs": 1322.2949580000131
},
{
"questionId": "q44",
"format": "csv",
"model": "gpt-5-nano",
"expected": "17",
"actual": "17",
"isCorrect": true,
"inputTokens": 2378,
"outputTokens": 1543,
"latencyMs": 16353.942624999996
},
{
"questionId": "q44",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "15",
"isCorrect": false,
"inputTokens": 2851,
"outputTokens": 5,
"latencyMs": 861.9590829999943
},
{
"questionId": "q44",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "17",
"actual": "13",
"isCorrect": false,
"inputTokens": 3189,
"outputTokens": 2,
"latencyMs": 912.1500829999859
},
{
"questionId": "q44",
"format": "xml",
"model": "gpt-5-nano",
"expected": "17",
"actual": "17",
"isCorrect": true,
"inputTokens": 7354,
"outputTokens": 519,
"latencyMs": 6838.317749999987
},
{
"questionId": "q44",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "15",
"isCorrect": false,
"inputTokens": 9355,
"outputTokens": 5,
"latencyMs": 1875.6236249999783
},
{
"questionId": "q44",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "17",
"actual": "13",
"isCorrect": false,
"inputTokens": 9095,
"outputTokens": 2,
"latencyMs": 1482.7477500000095
},
{
"questionId": "q44",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "17",
"actual": "17",
"isCorrect": true,
"inputTokens": 5009,
"outputTokens": 1223,
"latencyMs": 13887.709959
},
{
"questionId": "q44",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "15",
"isCorrect": false,
"inputTokens": 5755,
"outputTokens": 5,
"latencyMs": 1135.573457999999
},
{
"questionId": "q44",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "17",
"actual": "17",
"isCorrect": true,
"inputTokens": 5741,
"outputTokens": 2,
"latencyMs": 1063.958209000004
},
{
"questionId": "q45",
"format": "json",
"model": "gpt-5-nano",
"expected": "16",
"actual": "16",
"isCorrect": true,
"inputTokens": 6387,
"outputTokens": 903,
"latencyMs": 11372.731792000006
},
{
"questionId": "q45",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "16",
"actual": "12",
"isCorrect": false,
"inputTokens": 7865,
"outputTokens": 5,
"latencyMs": 1085.2727500000037
},
{
"questionId": "q45",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "16",
"actual": "14",
"isCorrect": false,
"inputTokens": 7906,
"outputTokens": 2,
"latencyMs": 788.761582999985
},
{
"questionId": "q45",
"format": "toon",
"model": "gpt-5-nano",
"expected": "16",
"actual": "16",
"isCorrect": true,
"inputTokens": 2524,
"outputTokens": 775,
"latencyMs": 9670.953584000003
},
{
"questionId": "q45",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "16",
"actual": "15",
"isCorrect": false,
"inputTokens": 2977,
"outputTokens": 5,
"latencyMs": 1307.5495419999934
},
{
"questionId": "q45",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "16",
"actual": "17",
"isCorrect": false,
"inputTokens": 3315,
"outputTokens": 2,
"latencyMs": 1034.7324580000131
},
{
"questionId": "q45",
"format": "csv",
"model": "gpt-5-nano",
"expected": "16",
"actual": "16",
"isCorrect": true,
"inputTokens": 2378,
"outputTokens": 647,
"latencyMs": 7079.23558399998
},
{
"questionId": "q45",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "16",
"actual": "15",
"isCorrect": false,
"inputTokens": 2851,
"outputTokens": 5,
"latencyMs": 1123.2897499999963
},
{
"questionId": "q45",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "16",
"actual": "13",
"isCorrect": false,
"inputTokens": 3189,
"outputTokens": 2,
"latencyMs": 1318.0012920000008
},
{
"questionId": "q45",
"format": "xml",
"model": "gpt-5-nano",
"expected": "16",
"actual": "16",
"isCorrect": true,
"inputTokens": 7354,
"outputTokens": 583,
"latencyMs": 5795.2639590000035
},
{
"questionId": "q45",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "16",
"actual": "12",
"isCorrect": false,
"inputTokens": 9355,
"outputTokens": 5,
"latencyMs": 1125.9925829999847
},
{
"questionId": "q45",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "16",
"actual": "16",
"isCorrect": true,
"inputTokens": 9095,
"outputTokens": 2,
"latencyMs": 8305.401042000012
},
{
"questionId": "q45",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "16",
"actual": "16",
"isCorrect": true,
"inputTokens": 5009,
"outputTokens": 839,
"latencyMs": 10189.432124999992
},
{
"questionId": "q45",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "16",
"actual": "15",
"isCorrect": false,
"inputTokens": 5755,
"outputTokens": 5,
"latencyMs": 1615.4580000000133
},
{
"questionId": "q45",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "16",
"actual": "10",
"isCorrect": false,
"inputTokens": 5741,
"outputTokens": 2,
"latencyMs": 1533.5138750000042
},
{
"questionId": "q46",
"format": "json",
"model": "gpt-5-nano",
"expected": "16",
"actual": "16",
"isCorrect": true,
"inputTokens": 6387,
"outputTokens": 519,
"latencyMs": 7169.378540999984
},
{
"questionId": "q46",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "16",
"actual": "10",
"isCorrect": false,
"inputTokens": 7865,
"outputTokens": 5,
"latencyMs": 1133.9953749999986
},
{
"questionId": "q46",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "16",
"actual": "15",
"isCorrect": false,
"inputTokens": 7906,
"outputTokens": 2,
"latencyMs": 1018.8396669999929
},
{
"questionId": "q46",
"format": "toon",
"model": "gpt-5-nano",
"expected": "16",
"actual": "16",
"isCorrect": true,
"inputTokens": 2524,
"outputTokens": 647,
"latencyMs": 6637.351416999998
},
{
"questionId": "q46",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "16",
"actual": "15",
"isCorrect": false,
"inputTokens": 2977,
"outputTokens": 5,
"latencyMs": 864.9015839999774
},
{
"questionId": "q46",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "16",
"actual": "17",
"isCorrect": false,
"inputTokens": 3315,
"outputTokens": 2,
"latencyMs": 992.5710419999959
},
{
"questionId": "q46",
"format": "csv",
"model": "gpt-5-nano",
"expected": "16",
"actual": "16",
"isCorrect": true,
"inputTokens": 2378,
"outputTokens": 839,
"latencyMs": 7426.826874999999
},
{
"questionId": "q46",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "16",
"actual": "15",
"isCorrect": false,
"inputTokens": 2851,
"outputTokens": 5,
"latencyMs": 893.4481660000165
},
{
"questionId": "q46",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "16",
"actual": "13",
"isCorrect": false,
"inputTokens": 3189,
"outputTokens": 2,
"latencyMs": 1200.8498329999857
},
{
"questionId": "q46",
"format": "xml",
"model": "gpt-5-nano",
"expected": "16",
"actual": "16",
"isCorrect": true,
"inputTokens": 7354,
"outputTokens": 775,
"latencyMs": 8865.971332999994
},
{
"questionId": "q46",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "16",
"actual": "10",
"isCorrect": false,
"inputTokens": 9355,
"outputTokens": 5,
"latencyMs": 1491.2856249999895
},
{
"questionId": "q46",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "16",
"actual": "17",
"isCorrect": false,
"inputTokens": 9095,
"outputTokens": 2,
"latencyMs": 1216.2892920000013
},
{
"questionId": "q46",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "16",
"actual": "16",
"isCorrect": true,
"inputTokens": 5009,
"outputTokens": 839,
"latencyMs": 9403.812124999997
},
{
"questionId": "q46",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "16",
"actual": "12",
"isCorrect": false,
"inputTokens": 5755,
"outputTokens": 5,
"latencyMs": 1126.5797500000044
},
{
"questionId": "q46",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "16",
"actual": "17",
"isCorrect": false,
"inputTokens": 5741,
"outputTokens": 2,
"latencyMs": 1671.0382089999912
},
{
"questionId": "q47",
"format": "json",
"model": "gpt-5-nano",
"expected": "91",
"actual": "91",
"isCorrect": true,
"inputTokens": 6392,
"outputTokens": 1671,
"latencyMs": 15363.507083999983
},
{
"questionId": "q47",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "91",
"actual": "89",
"isCorrect": false,
"inputTokens": 7870,
"outputTokens": 5,
"latencyMs": 1189.3042910000077
},
{
"questionId": "q47",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "91",
"actual": "90",
"isCorrect": false,
"inputTokens": 7914,
"outputTokens": 2,
"latencyMs": 1651.3950829999812
},
{
"questionId": "q47",
"format": "toon",
"model": "gpt-5-nano",
"expected": "91",
"actual": "91",
"isCorrect": true,
"inputTokens": 2529,
"outputTokens": 2311,
"latencyMs": 21706.56012499999
},
{
"questionId": "q47",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "91",
"actual": "85",
"isCorrect": false,
"inputTokens": 2982,
"outputTokens": 5,
"latencyMs": 1338.67408300002
},
{
"questionId": "q47",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "91",
"actual": "91",
"isCorrect": true,
"inputTokens": 3323,
"outputTokens": 2,
"latencyMs": 12844.911791999999
},
{
"questionId": "q47",
"format": "csv",
"model": "gpt-5-nano",
"expected": "91",
"actual": "91",
"isCorrect": true,
"inputTokens": 2383,
"outputTokens": 2823,
"latencyMs": 16151.116582999995
},
{
"questionId": "q47",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "91",
"actual": "85",
"isCorrect": false,
"inputTokens": 2856,
"outputTokens": 5,
"latencyMs": 3041.4831669999985
},
{
"questionId": "q47",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "91",
"actual": "91",
"isCorrect": true,
"inputTokens": 3197,
"outputTokens": 2,
"latencyMs": 12006.398833000014
},
{
"questionId": "q47",
"format": "xml",
"model": "gpt-5-nano",
"expected": "91",
"actual": "91",
"isCorrect": true,
"inputTokens": 7359,
"outputTokens": 2695,
"latencyMs": 26044.306083000003
},
{
"questionId": "q47",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "91",
"actual": "89",
"isCorrect": false,
"inputTokens": 9360,
"outputTokens": 5,
"latencyMs": 1573.8229160000046
},
{
"questionId": "q47",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "91",
"actual": "91",
"isCorrect": true,
"inputTokens": 9103,
"outputTokens": 2,
"latencyMs": 27838.932499999995
},
{
"questionId": "q47",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "91",
"actual": "91",
"isCorrect": true,
"inputTokens": 5014,
"outputTokens": 2823,
"latencyMs": 22628.083542000008
},
{
"questionId": "q47",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "91",
"actual": "89",
"isCorrect": false,
"inputTokens": 5760,
"outputTokens": 5,
"latencyMs": 1787.638666999992
},
{
"questionId": "q47",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "91",
"actual": "90",
"isCorrect": false,
"inputTokens": 5749,
"outputTokens": 2,
"latencyMs": 1343.8462499999732
},
{
"questionId": "q48",
"format": "json",
"model": "gpt-5-nano",
"expected": "67",
"actual": "67",
"isCorrect": true,
"inputTokens": 6392,
"outputTokens": 1479,
"latencyMs": 14420.83845900002
},
{
"questionId": "q48",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "67",
"actual": "57",
"isCorrect": false,
"inputTokens": 7870,
"outputTokens": 5,
"latencyMs": 1271.2462919999962
},
{
"questionId": "q48",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "67",
"actual": "70",
"isCorrect": false,
"inputTokens": 7914,
"outputTokens": 2,
"latencyMs": 1108.4178750000137
},
{
"questionId": "q48",
"format": "toon",
"model": "gpt-5-nano",
"expected": "67",
"actual": "67",
"isCorrect": true,
"inputTokens": 2529,
"outputTokens": 2247,
"latencyMs": 18434.695834000013
},
{
"questionId": "q48",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "67",
"actual": "47",
"isCorrect": false,
"inputTokens": 2982,
"outputTokens": 5,
"latencyMs": 1125.2875420000055
},
{
"questionId": "q48",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "67",
"actual": "60",
"isCorrect": false,
"inputTokens": 3323,
"outputTokens": 2,
"latencyMs": 13027.224332999991
},
{
"questionId": "q48",
"format": "csv",
"model": "gpt-5-nano",
"expected": "67",
"actual": "67",
"isCorrect": true,
"inputTokens": 2383,
"outputTokens": 2503,
"latencyMs": 23294.861958000023
},
{
"questionId": "q48",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "67",
"actual": "47",
"isCorrect": false,
"inputTokens": 2856,
"outputTokens": 5,
"latencyMs": 1208.8763340000005
},
{
"questionId": "q48",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "67",
"actual": "67",
"isCorrect": true,
"inputTokens": 3197,
"outputTokens": 2,
"latencyMs": 11604.352749999991
},
{
"questionId": "q48",
"format": "xml",
"model": "gpt-5-nano",
"expected": "67",
"actual": "67",
"isCorrect": true,
"inputTokens": 7359,
"outputTokens": 1479,
"latencyMs": 18504.804959
},
{
"questionId": "q48",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "67",
"actual": "57",
"isCorrect": false,
"inputTokens": 9360,
"outputTokens": 5,
"latencyMs": 1127.928917000012
},
{
"questionId": "q48",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "67",
"actual": "67",
"isCorrect": true,
"inputTokens": 9103,
"outputTokens": 2,
"latencyMs": 22629.69987500002
},
{
"questionId": "q48",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "67",
"actual": "67",
"isCorrect": true,
"inputTokens": 5014,
"outputTokens": 2631,
"latencyMs": 93677.45470900001
},
{
"questionId": "q48",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "67",
"actual": "57",
"isCorrect": false,
"inputTokens": 5760,
"outputTokens": 5,
"latencyMs": 1083.3742910000146
},
{
"questionId": "q48",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "67",
"actual": "70",
"isCorrect": false,
"inputTokens": 5749,
"outputTokens": 2,
"latencyMs": 1435.5812079999887
},
{
"questionId": "q49",
"format": "json",
"model": "gpt-5-nano",
"expected": "41",
"actual": "41",
"isCorrect": true,
"inputTokens": 6392,
"outputTokens": 1543,
"latencyMs": 14267.44858299999
},
{
"questionId": "q49",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "41",
"actual": "31",
"isCorrect": false,
"inputTokens": 7870,
"outputTokens": 5,
"latencyMs": 1483.0176250000077
},
{
"questionId": "q49",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "41",
"actual": "40",
"isCorrect": false,
"inputTokens": 7915,
"outputTokens": 2,
"latencyMs": 1598.6212089999754
},
{
"questionId": "q49",
"format": "toon",
"model": "gpt-5-nano",
"expected": "41",
"actual": "41",
"isCorrect": true,
"inputTokens": 2529,
"outputTokens": 1671,
"latencyMs": 15241.04254200001
},
{
"questionId": "q49",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "41",
"actual": "27",
"isCorrect": false,
"inputTokens": 2982,
"outputTokens": 5,
"latencyMs": 1011.390458000009
},
{
"questionId": "q49",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "41",
"actual": "41",
"isCorrect": true,
"inputTokens": 3324,
"outputTokens": 2,
"latencyMs": 17035.035957999993
},
{
"questionId": "q49",
"format": "csv",
"model": "gpt-5-nano",
"expected": "41",
"actual": "41",
"isCorrect": true,
"inputTokens": 2383,
"outputTokens": 1799,
"latencyMs": 15270.303583
},
{
"questionId": "q49",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "41",
"actual": "31",
"isCorrect": false,
"inputTokens": 2856,
"outputTokens": 5,
"latencyMs": 919.8500000000058
},
{
"questionId": "q49",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "41",
"actual": "41",
"isCorrect": true,
"inputTokens": 3198,
"outputTokens": 2,
"latencyMs": 9191.171333000006
},
{
"questionId": "q49",
"format": "xml",
"model": "gpt-5-nano",
"expected": "41",
"actual": "42",
"isCorrect": false,
"inputTokens": 7359,
"outputTokens": 1479,
"latencyMs": 14804.62512500002
},
{
"questionId": "q49",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "41",
"actual": "31",
"isCorrect": false,
"inputTokens": 9360,
"outputTokens": 5,
"latencyMs": 1236.6115409999911
},
{
"questionId": "q49",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "41",
"actual": "41",
"isCorrect": true,
"inputTokens": 9104,
"outputTokens": 2,
"latencyMs": 19284.10699999999
},
{
"questionId": "q49",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "41",
"actual": "41",
"isCorrect": true,
"inputTokens": 5014,
"outputTokens": 1863,
"latencyMs": 17259.288042
},
{
"questionId": "q49",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "41",
"actual": "31",
"isCorrect": false,
"inputTokens": 5760,
"outputTokens": 5,
"latencyMs": 1715.9734999999928
},
{
"questionId": "q49",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "41",
"actual": "44",
"isCorrect": false,
"inputTokens": 5750,
"outputTokens": 2,
"latencyMs": 1872.7845830000006
},
{
"questionId": "q50",
"format": "json",
"model": "gpt-5-nano",
"expected": "26",
"actual": "26",
"isCorrect": true,
"inputTokens": 6392,
"outputTokens": 1543,
"latencyMs": 15919.779666999995
},
{
"questionId": "q50",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "26",
"actual": "20",
"isCorrect": false,
"inputTokens": 7870,
"outputTokens": 5,
"latencyMs": 1291.8912500000151
},
{
"questionId": "q50",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "26",
"actual": "24",
"isCorrect": false,
"inputTokens": 7915,
"outputTokens": 2,
"latencyMs": 1005.6952080000192
},
{
"questionId": "q50",
"format": "toon",
"model": "gpt-5-nano",
"expected": "26",
"actual": "26",
"isCorrect": true,
"inputTokens": 2529,
"outputTokens": 1287,
"latencyMs": 30941.076040999993
},
{
"questionId": "q50",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "26",
"actual": "16",
"isCorrect": false,
"inputTokens": 2982,
"outputTokens": 5,
"latencyMs": 1114.022666999983
},
{
"questionId": "q50",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "26",
"actual": "26",
"isCorrect": true,
"inputTokens": 3324,
"outputTokens": 2,
"latencyMs": 17484.997459000006
},
{
"questionId": "q50",
"format": "csv",
"model": "gpt-5-nano",
"expected": "26",
"actual": "26",
"isCorrect": true,
"inputTokens": 2383,
"outputTokens": 1735,
"latencyMs": 16410.497957999993
},
{
"questionId": "q50",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "26",
"actual": "16",
"isCorrect": false,
"inputTokens": 2856,
"outputTokens": 5,
"latencyMs": 1096.8193330000213
},
{
"questionId": "q50",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "26",
"actual": "26",
"isCorrect": true,
"inputTokens": 3198,
"outputTokens": 2,
"latencyMs": 14324.279708000016
},
{
"questionId": "q50",
"format": "xml",
"model": "gpt-5-nano",
"expected": "26",
"actual": "26",
"isCorrect": true,
"inputTokens": 7359,
"outputTokens": 1543,
"latencyMs": 15139.200333999994
},
{
"questionId": "q50",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "26",
"actual": "21",
"isCorrect": false,
"inputTokens": 9360,
"outputTokens": 5,
"latencyMs": 1152.736042000004
},
{
"questionId": "q50",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "26",
"actual": "26",
"isCorrect": true,
"inputTokens": 9104,
"outputTokens": 2,
"latencyMs": 19624.726874999993
},
{
"questionId": "q50",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "26",
"actual": "26",
"isCorrect": true,
"inputTokens": 5014,
"outputTokens": 1031,
"latencyMs": 7884.299167000019
},
{
"questionId": "q50",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "26",
"actual": "20",
"isCorrect": false,
"inputTokens": 5760,
"outputTokens": 5,
"latencyMs": 984.3461250000109
},
{
"questionId": "q50",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "26",
"actual": "30",
"isCorrect": false,
"inputTokens": 5750,
"outputTokens": 2,
"latencyMs": 1294.497417000006
},
{
"questionId": "q51",
"format": "json",
"model": "gpt-5-nano",
"expected": "78",
"actual": "78",
"isCorrect": true,
"inputTokens": 6386,
"outputTokens": 2695,
"latencyMs": 25757.74325
},
{
"questionId": "q51",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "78",
"actual": "81",
"isCorrect": false,
"inputTokens": 7864,
"outputTokens": 5,
"latencyMs": 1330.1275409999944
},
{
"questionId": "q51",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "78",
"actual": "78",
"isCorrect": true,
"inputTokens": 7905,
"outputTokens": 2,
"latencyMs": 11349.042874999985
},
{
"questionId": "q51",
"format": "toon",
"model": "gpt-5-nano",
"expected": "78",
"actual": "78",
"isCorrect": true,
"inputTokens": 2523,
"outputTokens": 2119,
"latencyMs": 31391.252624999994
},
{
"questionId": "q51",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "78",
"actual": "78",
"isCorrect": true,
"inputTokens": 2976,
"outputTokens": 5,
"latencyMs": 1051.2665419999976
},
{
"questionId": "q51",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "78",
"actual": "78",
"isCorrect": true,
"inputTokens": 3314,
"outputTokens": 2,
"latencyMs": 9630.083915999974
},
{
"questionId": "q51",
"format": "csv",
"model": "gpt-5-nano",
"expected": "78",
"actual": "84",
"isCorrect": false,
"inputTokens": 2377,
"outputTokens": 1863,
"latencyMs": 15133.794208000007
},
{
"questionId": "q51",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "78",
"actual": "73",
"isCorrect": false,
"inputTokens": 2850,
"outputTokens": 5,
"latencyMs": 952.5605000000214
},
{
"questionId": "q51",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "78",
"actual": "78",
"isCorrect": true,
"inputTokens": 3188,
"outputTokens": 2,
"latencyMs": 11450.481040999992
},
{
"questionId": "q51",
"format": "xml",
"model": "gpt-5-nano",
"expected": "78",
"actual": "78",
"isCorrect": true,
"inputTokens": 7353,
"outputTokens": 903,
"latencyMs": 32111.97775000002
},
{
"questionId": "q51",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "78",
"actual": "77",
"isCorrect": false,
"inputTokens": 9354,
"outputTokens": 5,
"latencyMs": 2015.6932080000115
},
{
"questionId": "q51",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "78",
"actual": "78",
"isCorrect": true,
"inputTokens": 9094,
"outputTokens": 2,
"latencyMs": 11316.587916999997
},
{
"questionId": "q51",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "78",
"actual": "78",
"isCorrect": true,
"inputTokens": 5008,
"outputTokens": 1607,
"latencyMs": 17228.22670900001
},
{
"questionId": "q51",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "78",
"actual": "77",
"isCorrect": false,
"inputTokens": 5754,
"outputTokens": 5,
"latencyMs": 1434.8912919999857
},
{
"questionId": "q51",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "78",
"actual": "78",
"isCorrect": true,
"inputTokens": 5740,
"outputTokens": 2,
"latencyMs": 15144.007791000011
},
{
"questionId": "q52",
"format": "json",
"model": "gpt-5-nano",
"expected": "22",
"actual": "21",
"isCorrect": false,
"inputTokens": 6386,
"outputTokens": 839,
"latencyMs": 8969.827833999996
},
{
"questionId": "q52",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "22",
"actual": "15",
"isCorrect": false,
"inputTokens": 7864,
"outputTokens": 5,
"latencyMs": 1038.1520420000015
},
{
"questionId": "q52",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "22",
"actual": "22",
"isCorrect": true,
"inputTokens": 7905,
"outputTokens": 2,
"latencyMs": 8416.65183399999
},
{
"questionId": "q52",
"format": "toon",
"model": "gpt-5-nano",
"expected": "22",
"actual": "22",
"isCorrect": true,
"inputTokens": 2523,
"outputTokens": 967,
"latencyMs": 9633.799374999973
},
{
"questionId": "q52",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "22",
"actual": "16",
"isCorrect": false,
"inputTokens": 2976,
"outputTokens": 5,
"latencyMs": 1134.1007079999836
},
{
"questionId": "q52",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "22",
"actual": "22",
"isCorrect": true,
"inputTokens": 3314,
"outputTokens": 2,
"latencyMs": 11542.581249999988
},
{
"questionId": "q52",
"format": "csv",
"model": "gpt-5-nano",
"expected": "22",
"actual": "24",
"isCorrect": false,
"inputTokens": 2377,
"outputTokens": 2695,
"latencyMs": 41106.853249999986
},
{
"questionId": "q52",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "22",
"actual": "20",
"isCorrect": false,
"inputTokens": 2850,
"outputTokens": 5,
"latencyMs": 918.981958999997
},
{
"questionId": "q52",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "22",
"actual": "22",
"isCorrect": true,
"inputTokens": 3188,
"outputTokens": 2,
"latencyMs": 2052.5287920000264
},
{
"questionId": "q52",
"format": "xml",
"model": "gpt-5-nano",
"expected": "22",
"actual": "22",
"isCorrect": true,
"inputTokens": 7353,
"outputTokens": 839,
"latencyMs": 8334.775790999993
},
{
"questionId": "q52",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "22",
"actual": "15",
"isCorrect": false,
"inputTokens": 9354,
"outputTokens": 5,
"latencyMs": 949.7613340000098
},
{
"questionId": "q52",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "22",
"actual": "22",
"isCorrect": true,
"inputTokens": 9094,
"outputTokens": 2,
"latencyMs": 10658.192250000022
},
{
"questionId": "q52",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "22",
"actual": "22",
"isCorrect": true,
"inputTokens": 5008,
"outputTokens": 1991,
"latencyMs": 14355.515540999972
},
{
"questionId": "q52",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "22",
"actual": "16",
"isCorrect": false,
"inputTokens": 5754,
"outputTokens": 5,
"latencyMs": 1039.7822079999896
},
{
"questionId": "q52",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "22",
"actual": "22",
"isCorrect": true,
"inputTokens": 5740,
"outputTokens": 2,
"latencyMs": 12535.245041999995
},
{
"questionId": "q53",
"format": "json",
"model": "gpt-5-nano",
"expected": "12",
"actual": "12",
"isCorrect": true,
"inputTokens": 6394,
"outputTokens": 1223,
"latencyMs": 11632.450709000026
},
{
"questionId": "q53",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "12",
"actual": "9",
"isCorrect": false,
"inputTokens": 7872,
"outputTokens": 5,
"latencyMs": 1179.524166999996
},
{
"questionId": "q53",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "12",
"actual": "12",
"isCorrect": true,
"inputTokens": 7916,
"outputTokens": 2,
"latencyMs": 4426.7412919999915
},
{
"questionId": "q53",
"format": "toon",
"model": "gpt-5-nano",
"expected": "12",
"actual": "12",
"isCorrect": true,
"inputTokens": 2531,
"outputTokens": 1799,
"latencyMs": 21729.542084000015
},
{
"questionId": "q53",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "12",
"actual": "9",
"isCorrect": false,
"inputTokens": 2984,
"outputTokens": 5,
"latencyMs": 3320.943874999997
},
{
"questionId": "q53",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "12",
"actual": "12",
"isCorrect": true,
"inputTokens": 3325,
"outputTokens": 2,
"latencyMs": 5572.28795800003
},
{
"questionId": "q53",
"format": "csv",
"model": "gpt-5-nano",
"expected": "12",
"actual": "12",
"isCorrect": true,
"inputTokens": 2385,
"outputTokens": 1479,
"latencyMs": 23517.660458
},
{
"questionId": "q53",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "12",
"actual": "10",
"isCorrect": false,
"inputTokens": 2858,
"outputTokens": 5,
"latencyMs": 1028.1668340000033
},
{
"questionId": "q53",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "12",
"actual": "13",
"isCorrect": false,
"inputTokens": 3199,
"outputTokens": 2,
"latencyMs": 21513.301958999975
},
{
"questionId": "q53",
"format": "xml",
"model": "gpt-5-nano",
"expected": "12",
"actual": "12",
"isCorrect": true,
"inputTokens": 7361,
"outputTokens": 1415,
"latencyMs": 25169.729082999984
},
{
"questionId": "q53",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "12",
"actual": "11",
"isCorrect": false,
"inputTokens": 9362,
"outputTokens": 5,
"latencyMs": 1306.0004590000026
},
{
"questionId": "q53",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "12",
"actual": "12",
"isCorrect": true,
"inputTokens": 9105,
"outputTokens": 2,
"latencyMs": 22791.16737499999
},
{
"questionId": "q53",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "12",
"actual": "12",
"isCorrect": true,
"inputTokens": 5016,
"outputTokens": 1415,
"latencyMs": 18191.111124999996
},
{
"questionId": "q53",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "12",
"actual": "10",
"isCorrect": false,
"inputTokens": 5762,
"outputTokens": 5,
"latencyMs": 927.1151660000323
},
{
"questionId": "q53",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "12",
"actual": "13",
"isCorrect": false,
"inputTokens": 5751,
"outputTokens": 2,
"latencyMs": 5849.65625
},
{
"questionId": "q54",
"format": "json",
"model": "gpt-5-nano",
"expected": "11",
"actual": "11",
"isCorrect": true,
"inputTokens": 6394,
"outputTokens": 1543,
"latencyMs": 17624.57283399999
},
{
"questionId": "q54",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "11",
"actual": "7",
"isCorrect": false,
"inputTokens": 7872,
"outputTokens": 5,
"latencyMs": 1445.3690829999978
},
{
"questionId": "q54",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "11",
"actual": "11",
"isCorrect": true,
"inputTokens": 7916,
"outputTokens": 2,
"latencyMs": 4641.89829099999
},
{
"questionId": "q54",
"format": "toon",
"model": "gpt-5-nano",
"expected": "11",
"actual": "11",
"isCorrect": true,
"inputTokens": 2531,
"outputTokens": 1095,
"latencyMs": 16408.578749999986
},
{
"questionId": "q54",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "11",
"actual": "6",
"isCorrect": false,
"inputTokens": 2984,
"outputTokens": 5,
"latencyMs": 1336.712916999997
},
{
"questionId": "q54",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "11",
"actual": "11",
"isCorrect": true,
"inputTokens": 3325,
"outputTokens": 2,
"latencyMs": 5775.600584
},
{
"questionId": "q54",
"format": "csv",
"model": "gpt-5-nano",
"expected": "11",
"actual": "11",
"isCorrect": true,
"inputTokens": 2385,
"outputTokens": 1479,
"latencyMs": 15717.845583999995
},
{
"questionId": "q54",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "11",
"actual": "8",
"isCorrect": false,
"inputTokens": 2858,
"outputTokens": 5,
"latencyMs": 2198.0668749999604
},
{
"questionId": "q54",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "11",
"actual": "11",
"isCorrect": true,
"inputTokens": 3199,
"outputTokens": 2,
"latencyMs": 37479.52691700001
},
{
"questionId": "q54",
"format": "xml",
"model": "gpt-5-nano",
"expected": "11",
"actual": "11",
"isCorrect": true,
"inputTokens": 7361,
"outputTokens": 1095,
"latencyMs": 10663.58587499999
},
{
"questionId": "q54",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "11",
"actual": "8",
"isCorrect": false,
"inputTokens": 9362,
"outputTokens": 5,
"latencyMs": 1077.469374999986
},
{
"questionId": "q54",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "11",
"actual": "11",
"isCorrect": true,
"inputTokens": 9105,
"outputTokens": 2,
"latencyMs": 16569.429416999978
},
{
"questionId": "q54",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "11",
"actual": "11",
"isCorrect": true,
"inputTokens": 5016,
"outputTokens": 1415,
"latencyMs": 15212.04125000001
},
{
"questionId": "q54",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "11",
"actual": "8",
"isCorrect": false,
"inputTokens": 5762,
"outputTokens": 5,
"latencyMs": 935.8371249999618
},
{
"questionId": "q54",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "11",
"actual": "10",
"isCorrect": false,
"inputTokens": 5751,
"outputTokens": 2,
"latencyMs": 5121.037708000047
},
{
"questionId": "q55",
"format": "json",
"model": "gpt-5-nano",
"expected": "11",
"actual": "11",
"isCorrect": true,
"inputTokens": 6394,
"outputTokens": 1095,
"latencyMs": 34446.65704199998
},
{
"questionId": "q55",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "11",
"actual": "8",
"isCorrect": false,
"inputTokens": 7872,
"outputTokens": 5,
"latencyMs": 2282.8374170000316
},
{
"questionId": "q55",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "11",
"actual": "11",
"isCorrect": true,
"inputTokens": 7916,
"outputTokens": 2,
"latencyMs": 5432.8123749999795
},
{
"questionId": "q55",
"format": "toon",
"model": "gpt-5-nano",
"expected": "11",
"actual": "11",
"isCorrect": true,
"inputTokens": 2531,
"outputTokens": 1479,
"latencyMs": 42719.131124999956
},
{
"questionId": "q55",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "11",
"actual": "7",
"isCorrect": false,
"inputTokens": 2984,
"outputTokens": 5,
"latencyMs": 1832.9572909999988
},
{
"questionId": "q55",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "11",
"actual": "11",
"isCorrect": true,
"inputTokens": 3325,
"outputTokens": 2,
"latencyMs": 7711.211624999996
},
{
"questionId": "q55",
"format": "csv",
"model": "gpt-5-nano",
"expected": "11",
"actual": "11",
"isCorrect": true,
"inputTokens": 2385,
"outputTokens": 1607,
"latencyMs": 57515.48358300002
},
{
"questionId": "q55",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "11",
"actual": "8",
"isCorrect": false,
"inputTokens": 2858,
"outputTokens": 5,
"latencyMs": 3238.0369170000195
},
{
"questionId": "q55",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "11",
"actual": "11",
"isCorrect": true,
"inputTokens": 3199,
"outputTokens": 2,
"latencyMs": 9271.402125000022
},
{
"questionId": "q55",
"format": "xml",
"model": "gpt-5-nano",
"expected": "11",
"actual": "11",
"isCorrect": true,
"inputTokens": 7361,
"outputTokens": 967,
"latencyMs": 12946.014833999972
},
{
"questionId": "q55",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "11",
"actual": "9",
"isCorrect": false,
"inputTokens": 9362,
"outputTokens": 5,
"latencyMs": 1523.2371250000433
},
{
"questionId": "q55",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "11",
"actual": "11",
"isCorrect": true,
"inputTokens": 9105,
"outputTokens": 2,
"latencyMs": 11301.93191600003
},
{
"questionId": "q55",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "11",
"actual": "11",
"isCorrect": true,
"inputTokens": 5016,
"outputTokens": 1351,
"latencyMs": 18129.383040999994
},
{
"questionId": "q55",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "11",
"actual": "9",
"isCorrect": false,
"inputTokens": 5762,
"outputTokens": 5,
"latencyMs": 1117.6802920000046
},
{
"questionId": "q55",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "11",
"actual": "11",
"isCorrect": true,
"inputTokens": 5751,
"outputTokens": 2,
"latencyMs": 4743.260083000001
},
{
"questionId": "q56",
"format": "json",
"model": "gpt-5-nano",
"expected": "12",
"actual": "11",
"isCorrect": false,
"inputTokens": 6394,
"outputTokens": 1479,
"latencyMs": 12632.222667000024
},
{
"questionId": "q56",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "12",
"actual": "7",
"isCorrect": false,
"inputTokens": 7872,
"outputTokens": 5,
"latencyMs": 1567.1472920000087
},
{
"questionId": "q56",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "12",
"actual": "12",
"isCorrect": true,
"inputTokens": 7916,
"outputTokens": 2,
"latencyMs": 5749.258750000037
},
{
"questionId": "q56",
"format": "toon",
"model": "gpt-5-nano",
"expected": "12",
"actual": "12",
"isCorrect": true,
"inputTokens": 2531,
"outputTokens": 1479,
"latencyMs": 17473.24116700003
},
{
"questionId": "q56",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "12",
"actual": "6",
"isCorrect": false,
"inputTokens": 2984,
"outputTokens": 5,
"latencyMs": 922.2049170000246
},
{
"questionId": "q56",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "12",
"actual": "12",
"isCorrect": true,
"inputTokens": 3325,
"outputTokens": 2,
"latencyMs": 5561.690833000001
},
{
"questionId": "q56",
"format": "csv",
"model": "gpt-5-nano",
"expected": "12",
"actual": "11",
"isCorrect": false,
"inputTokens": 2385,
"outputTokens": 2183,
"latencyMs": 23539.67433399998
},
{
"questionId": "q56",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "12",
"actual": "7",
"isCorrect": false,
"inputTokens": 2858,
"outputTokens": 5,
"latencyMs": 1159.2557500000112
},
{
"questionId": "q56",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "12",
"actual": "12",
"isCorrect": true,
"inputTokens": 3199,
"outputTokens": 2,
"latencyMs": 9863.856417000003
},
{
"questionId": "q56",
"format": "xml",
"model": "gpt-5-nano",
"expected": "12",
"actual": "12",
"isCorrect": true,
"inputTokens": 7361,
"outputTokens": 1927,
"latencyMs": 106756.24308399996
},
{
"questionId": "q56",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "12",
"actual": "8",
"isCorrect": false,
"inputTokens": 9362,
"outputTokens": 5,
"latencyMs": 1064.2161659999983
},
{
"questionId": "q56",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "12",
"actual": "12",
"isCorrect": true,
"inputTokens": 9105,
"outputTokens": 2,
"latencyMs": 7033.105833999987
},
{
"questionId": "q56",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "12",
"actual": "12",
"isCorrect": true,
"inputTokens": 5016,
"outputTokens": 1095,
"latencyMs": 14048.506916999992
},
{
"questionId": "q56",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "12",
"actual": "8",
"isCorrect": false,
"inputTokens": 5762,
"outputTokens": 5,
"latencyMs": 1192.642125000013
},
{
"questionId": "q56",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "12",
"actual": "12",
"isCorrect": true,
"inputTokens": 5751,
"outputTokens": 2,
"latencyMs": 5957.613042000041
},
{
"questionId": "q57",
"format": "json",
"model": "gpt-5-nano",
"expected": "62",
"actual": "62",
"isCorrect": true,
"inputTokens": 6393,
"outputTokens": 3719,
"latencyMs": 332341.88812499994
},
{
"questionId": "q57",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "62",
"actual": "62",
"isCorrect": true,
"inputTokens": 7872,
"outputTokens": 5,
"latencyMs": 1168.1113340000156
},
{
"questionId": "q57",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "62",
"actual": "62",
"isCorrect": true,
"inputTokens": 7912,
"outputTokens": 2,
"latencyMs": 20747.95541699999
},
{
"questionId": "q57",
"format": "toon",
"model": "gpt-5-nano",
"expected": "62",
"actual": "62",
"isCorrect": true,
"inputTokens": 2530,
"outputTokens": 3079,
"latencyMs": 24893.890125000034
},
{
"questionId": "q57",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "62",
"actual": "62",
"isCorrect": true,
"inputTokens": 2984,
"outputTokens": 5,
"latencyMs": 1446.5637920000008
},
{
"questionId": "q57",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "62",
"actual": "62",
"isCorrect": true,
"inputTokens": 3321,
"outputTokens": 2,
"latencyMs": 18187.491625000024
},
{
"questionId": "q57",
"format": "csv",
"model": "gpt-5-nano",
"expected": "62",
"actual": "64",
"isCorrect": false,
"inputTokens": 2384,
"outputTokens": 4551,
"latencyMs": 61990.75604200002
},
{
"questionId": "q57",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "62",
"actual": "62",
"isCorrect": true,
"inputTokens": 2858,
"outputTokens": 5,
"latencyMs": 2368.5950840000296
},
{
"questionId": "q57",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "62",
"actual": "62",
"isCorrect": true,
"inputTokens": 3195,
"outputTokens": 2,
"latencyMs": 19295.422582999978
},
{
"questionId": "q57",
"format": "xml",
"model": "gpt-5-nano",
"expected": "62",
"actual": "62",
"isCorrect": true,
"inputTokens": 7360,
"outputTokens": 3015,
"latencyMs": 27433.851124999986
},
{
"questionId": "q57",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "62",
"actual": "62",
"isCorrect": true,
"inputTokens": 9362,
"outputTokens": 5,
"latencyMs": 1239.7937919999822
},
{
"questionId": "q57",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "62",
"actual": "62",
"isCorrect": true,
"inputTokens": 9101,
"outputTokens": 2,
"latencyMs": 21703.45670800004
},
{
"questionId": "q57",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "62",
"actual": "62",
"isCorrect": true,
"inputTokens": 5015,
"outputTokens": 4615,
"latencyMs": 38416.754041999986
},
{
"questionId": "q57",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "62",
"actual": "62",
"isCorrect": true,
"inputTokens": 5762,
"outputTokens": 5,
"latencyMs": 974.5636659999727
},
{
"questionId": "q57",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "62",
"actual": "62",
"isCorrect": true,
"inputTokens": 5747,
"outputTokens": 2,
"latencyMs": 20388.102249999996
},
{
"questionId": "q58",
"format": "json",
"model": "gpt-5-nano",
"expected": "45",
"actual": "45",
"isCorrect": true,
"inputTokens": 6393,
"outputTokens": 2567,
"latencyMs": 23536.014041999995
},
{
"questionId": "q58",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "45",
"actual": "42",
"isCorrect": false,
"inputTokens": 7872,
"outputTokens": 5,
"latencyMs": 1002.8562090000487
},
{
"questionId": "q58",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "45",
"actual": "45",
"isCorrect": true,
"inputTokens": 7913,
"outputTokens": 2,
"latencyMs": 35012.274959
},
{
"questionId": "q58",
"format": "toon",
"model": "gpt-5-nano",
"expected": "45",
"actual": "45",
"isCorrect": true,
"inputTokens": 2530,
"outputTokens": 3143,
"latencyMs": 27182.416041999997
},
{
"questionId": "q58",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "45",
"actual": "42",
"isCorrect": false,
"inputTokens": 2984,
"outputTokens": 5,
"latencyMs": 935.4336250000051
},
{
"questionId": "q58",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "45",
"actual": "45",
"isCorrect": true,
"inputTokens": 3322,
"outputTokens": 2,
"latencyMs": 19937.21420799999
},
{
"questionId": "q58",
"format": "csv",
"model": "gpt-5-nano",
"expected": "45",
"actual": "46",
"isCorrect": false,
"inputTokens": 2384,
"outputTokens": 3271,
"latencyMs": 26153.538457999995
},
{
"questionId": "q58",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "45",
"actual": "42",
"isCorrect": false,
"inputTokens": 2858,
"outputTokens": 5,
"latencyMs": 1029.4126660000184
},
{
"questionId": "q58",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "45",
"actual": "45",
"isCorrect": true,
"inputTokens": 3196,
"outputTokens": 2,
"latencyMs": 36182.66629199998
},
{
"questionId": "q58",
"format": "xml",
"model": "gpt-5-nano",
"expected": "45",
"actual": "45",
"isCorrect": true,
"inputTokens": 7360,
"outputTokens": 2823,
"latencyMs": 27939.341790999984
},
{
"questionId": "q58",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "45",
"actual": "47",
"isCorrect": false,
"inputTokens": 9362,
"outputTokens": 5,
"latencyMs": 1699.4091669999762
},
{
"questionId": "q58",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "45",
"actual": "45",
"isCorrect": true,
"inputTokens": 9102,
"outputTokens": 2,
"latencyMs": 20119.059750000015
},
{
"questionId": "q58",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "45",
"actual": "45",
"isCorrect": true,
"inputTokens": 5015,
"outputTokens": 2631,
"latencyMs": 25962.383333999955
},
{
"questionId": "q58",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "45",
"actual": "38",
"isCorrect": false,
"inputTokens": 5762,
"outputTokens": 5,
"latencyMs": 1063.877124999999
},
{
"questionId": "q58",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "45",
"actual": "45",
"isCorrect": true,
"inputTokens": 5748,
"outputTokens": 2,
"latencyMs": 37951.156874999986
},
{
"questionId": "q59",
"format": "json",
"model": "gpt-5-nano",
"expected": "96.17",
"actual": "96.17",
"isCorrect": true,
"inputTokens": 9739,
"outputTokens": 137,
"latencyMs": 2635.883374999976
},
{
"questionId": "q59",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "96.17",
"actual": "96.17",
"isCorrect": true,
"inputTokens": 11907,
"outputTokens": 7,
"latencyMs": 1164.0292079999927
},
{
"questionId": "q59",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "96.17",
"actual": "96.17",
"isCorrect": true,
"inputTokens": 12113,
"outputTokens": 5,
"latencyMs": 1510.9628750000265
},
{
"questionId": "q59",
"format": "toon",
"model": "gpt-5-nano",
"expected": "96.17",
"actual": "96.17",
"isCorrect": true,
"inputTokens": 6013,
"outputTokens": 73,
"latencyMs": 3338.3452919999836
},
{
"questionId": "q59",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "96.17",
"actual": "96.17",
"isCorrect": true,
"inputTokens": 6993,
"outputTokens": 7,
"latencyMs": 1290.2898750000168
},
{
"questionId": "q59",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "96.17",
"actual": "96.17",
"isCorrect": true,
"inputTokens": 7201,
"outputTokens": 5,
"latencyMs": 1073.7947919999715
},
{
"questionId": "q59",
"format": "csv",
"model": "gpt-5-nano",
"expected": "96.17",
"actual": "96.17",
"isCorrect": true,
"inputTokens": 6781,
"outputTokens": 201,
"latencyMs": 3254.3114590000478
},
{
"questionId": "q59",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "96.17",
"actual": "96.17",
"isCorrect": true,
"inputTokens": 8414,
"outputTokens": 7,
"latencyMs": 1300.0598330000066
},
{
"questionId": "q59",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "96.17",
"actual": "96.17",
"isCorrect": true,
"inputTokens": 7838,
"outputTokens": 5,
"latencyMs": 2603.532125000027
},
{
"questionId": "q59",
"format": "xml",
"model": "gpt-5-nano",
"expected": "96.17",
"actual": "96.17",
"isCorrect": true,
"inputTokens": 11037,
"outputTokens": 137,
"latencyMs": 2712.822291999997
},
{
"questionId": "q59",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "96.17",
"actual": "96.17",
"isCorrect": true,
"inputTokens": 13380,
"outputTokens": 7,
"latencyMs": 1369.1374160000123
},
{
"questionId": "q59",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "96.17",
"actual": "96.17",
"isCorrect": true,
"inputTokens": 13451,
"outputTokens": 5,
"latencyMs": 1339.450165999995
},
{
"questionId": "q59",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "96.17",
"actual": "96.17",
"isCorrect": true,
"inputTokens": 7373,
"outputTokens": 137,
"latencyMs": 2561.059583000024
},
{
"questionId": "q59",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "96.17",
"actual": "96.17",
"isCorrect": true,
"inputTokens": 8385,
"outputTokens": 7,
"latencyMs": 1122.8535000000265
},
{
"questionId": "q59",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "96.17",
"actual": "96.17",
"isCorrect": true,
"inputTokens": 8427,
"outputTokens": 5,
"latencyMs": 1243.387041000009
},
{
"questionId": "q60",
"format": "json",
"model": "gpt-5-nano",
"expected": "shipped",
"actual": "shipped",
"isCorrect": true,
"inputTokens": 9738,
"outputTokens": 200,
"latencyMs": 4276.413916999998
},
{
"questionId": "q60",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "shipped",
"actual": "shipped",
"isCorrect": true,
"inputTokens": 11906,
"outputTokens": 4,
"latencyMs": 1337.8417079999927
},
{
"questionId": "q60",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "shipped",
"actual": "shipped",
"isCorrect": true,
"inputTokens": 12112,
"outputTokens": 2,
"latencyMs": 1526.3712500000256
},
{
"questionId": "q60",
"format": "toon",
"model": "gpt-5-nano",
"expected": "shipped",
"actual": "shipped",
"isCorrect": true,
"inputTokens": 6012,
"outputTokens": 136,
"latencyMs": 2210.3001669999794
},
{
"questionId": "q60",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "shipped",
"actual": "shipped",
"isCorrect": true,
"inputTokens": 6992,
"outputTokens": 4,
"latencyMs": 1227.2460840000422
},
{
"questionId": "q60",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "shipped",
"actual": "shipped",
"isCorrect": true,
"inputTokens": 7200,
"outputTokens": 2,
"latencyMs": 1149.5532499999972
},
{
"questionId": "q60",
"format": "csv",
"model": "gpt-5-nano",
"expected": "shipped",
"actual": "shipped",
"isCorrect": true,
"inputTokens": 6780,
"outputTokens": 200,
"latencyMs": 2463.5065419999883
},
{
"questionId": "q60",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "shipped",
"actual": "shipped",
"isCorrect": true,
"inputTokens": 8413,
"outputTokens": 4,
"latencyMs": 1474.229833999998
},
{
"questionId": "q60",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "shipped",
"actual": "shipped",
"isCorrect": true,
"inputTokens": 7837,
"outputTokens": 2,
"latencyMs": 3119.7202080000425
},
{
"questionId": "q60",
"format": "xml",
"model": "gpt-5-nano",
"expected": "shipped",
"actual": "shipped",
"isCorrect": true,
"inputTokens": 11036,
"outputTokens": 136,
"latencyMs": 2996.8577500000247
},
{
"questionId": "q60",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "shipped",
"actual": "shipped",
"isCorrect": true,
"inputTokens": 13379,
"outputTokens": 4,
"latencyMs": 1374.8893749999697
},
{
"questionId": "q60",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "shipped",
"actual": "shipped",
"isCorrect": true,
"inputTokens": 13450,
"outputTokens": 2,
"latencyMs": 1361.1552500000107
},
{
"questionId": "q60",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "shipped",
"actual": "shipped",
"isCorrect": true,
"inputTokens": 7372,
"outputTokens": 136,
"latencyMs": 2356.033334000036
},
{
"questionId": "q60",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "shipped",
"actual": "shipped",
"isCorrect": true,
"inputTokens": 8384,
"outputTokens": 4,
"latencyMs": 1128.8600410000072
},
{
"questionId": "q60",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "shipped",
"actual": "shipped",
"isCorrect": true,
"inputTokens": 8426,
"outputTokens": 2,
"latencyMs": 1012.1753329999628
},
{
"questionId": "q61",
"format": "json",
"model": "gpt-5-nano",
"expected": "599.39",
"actual": "599.39",
"isCorrect": true,
"inputTokens": 9739,
"outputTokens": 201,
"latencyMs": 2894.6042920000036
},
{
"questionId": "q61",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "599.39",
"actual": "599.39",
"isCorrect": true,
"inputTokens": 11907,
"outputTokens": 7,
"latencyMs": 1140.3883749999804
},
{
"questionId": "q61",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "599.39",
"actual": "599.39",
"isCorrect": true,
"inputTokens": 12113,
"outputTokens": 6,
"latencyMs": 1286.3832499999553
},
{
"questionId": "q61",
"format": "toon",
"model": "gpt-5-nano",
"expected": "599.39",
"actual": "599.39",
"isCorrect": true,
"inputTokens": 6013,
"outputTokens": 201,
"latencyMs": 5983.418707999983
},
{
"questionId": "q61",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "599.39",
"actual": "599.39",
"isCorrect": true,
"inputTokens": 6993,
"outputTokens": 7,
"latencyMs": 1257.5179999999818
},
{
"questionId": "q61",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "599.39",
"actual": "599.39",
"isCorrect": true,
"inputTokens": 7201,
"outputTokens": 6,
"latencyMs": 1470.9667500000214
},
{
"questionId": "q61",
"format": "csv",
"model": "gpt-5-nano",
"expected": "599.39",
"actual": "599.39",
"isCorrect": true,
"inputTokens": 6781,
"outputTokens": 265,
"latencyMs": 3804.386666000006
},
{
"questionId": "q61",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "599.39",
"actual": "599.39",
"isCorrect": true,
"inputTokens": 8414,
"outputTokens": 7,
"latencyMs": 1181.0549580000225
},
{
"questionId": "q61",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "599.39",
"actual": "599.39",
"isCorrect": true,
"inputTokens": 7838,
"outputTokens": 6,
"latencyMs": 2825.75008300005
},
{
"questionId": "q61",
"format": "xml",
"model": "gpt-5-nano",
"expected": "599.39",
"actual": "599.39",
"isCorrect": true,
"inputTokens": 11037,
"outputTokens": 201,
"latencyMs": 4155.127124999999
},
{
"questionId": "q61",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "599.39",
"actual": "599.39",
"isCorrect": true,
"inputTokens": 13380,
"outputTokens": 7,
"latencyMs": 1243.845667000045
},
{
"questionId": "q61",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "599.39",
"actual": "599.39",
"isCorrect": true,
"inputTokens": 13451,
"outputTokens": 6,
"latencyMs": 1183.5630419999943
},
{
"questionId": "q61",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "599.39",
"actual": "599.39",
"isCorrect": true,
"inputTokens": 7373,
"outputTokens": 137,
"latencyMs": 3305.4360420000157
},
{
"questionId": "q61",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "599.39",
"actual": "599.39",
"isCorrect": true,
"inputTokens": 8385,
"outputTokens": 7,
"latencyMs": 1122.905792000005
},
{
"questionId": "q61",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "599.39",
"actual": "599.39",
"isCorrect": true,
"inputTokens": 8427,
"outputTokens": 6,
"latencyMs": 1289.1040829999838
},
{
"questionId": "q62",
"format": "json",
"model": "gpt-5-nano",
"expected": "processing",
"actual": "processing",
"isCorrect": true,
"inputTokens": 9738,
"outputTokens": 199,
"latencyMs": 4459.190540999989
},
{
"questionId": "q62",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "processing",
"actual": "processing",
"isCorrect": true,
"inputTokens": 11906,
"outputTokens": 4,
"latencyMs": 1385.2943749999977
},
{
"questionId": "q62",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "processing",
"actual": "processing",
"isCorrect": true,
"inputTokens": 12112,
"outputTokens": 1,
"latencyMs": 1281.1537499999977
},
{
"questionId": "q62",
"format": "toon",
"model": "gpt-5-nano",
"expected": "processing",
"actual": "processing",
"isCorrect": true,
"inputTokens": 6012,
"outputTokens": 135,
"latencyMs": 2211.059750000015
},
{
"questionId": "q62",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "processing",
"actual": "processing",
"isCorrect": true,
"inputTokens": 6992,
"outputTokens": 4,
"latencyMs": 1282.652208000014
},
{
"questionId": "q62",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "processing",
"actual": "processing",
"isCorrect": true,
"inputTokens": 7200,
"outputTokens": 1,
"latencyMs": 1296.6791250000242
},
{
"questionId": "q62",
"format": "csv",
"model": "gpt-5-nano",
"expected": "processing",
"actual": "processing",
"isCorrect": true,
"inputTokens": 6780,
"outputTokens": 135,
"latencyMs": 4460.896583999973
},
{
"questionId": "q62",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "processing",
"actual": "processing",
"isCorrect": true,
"inputTokens": 8413,
"outputTokens": 4,
"latencyMs": 1311.2437919999938
},
{
"questionId": "q62",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "processing",
"actual": "processing",
"isCorrect": true,
"inputTokens": 7837,
"outputTokens": 1,
"latencyMs": 2321.0788329999777
},
{
"questionId": "q62",
"format": "xml",
"model": "gpt-5-nano",
"expected": "processing",
"actual": "processing",
"isCorrect": true,
"inputTokens": 11036,
"outputTokens": 135,
"latencyMs": 2574.011124999961
},
{
"questionId": "q62",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "processing",
"actual": "processing",
"isCorrect": true,
"inputTokens": 13379,
"outputTokens": 4,
"latencyMs": 1331.6849169999477
},
{
"questionId": "q62",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "processing",
"actual": "processing",
"isCorrect": true,
"inputTokens": 13450,
"outputTokens": 1,
"latencyMs": 1876.967500000028
},
{
"questionId": "q62",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "processing",
"actual": "processing",
"isCorrect": true,
"inputTokens": 7372,
"outputTokens": 71,
"latencyMs": 4585.356583999994
},
{
"questionId": "q62",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "processing",
"actual": "processing",
"isCorrect": true,
"inputTokens": 8384,
"outputTokens": 4,
"latencyMs": 1472.130541999999
},
{
"questionId": "q62",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "processing",
"actual": "processing",
"isCorrect": true,
"inputTokens": 8426,
"outputTokens": 1,
"latencyMs": 3066.8415830000304
},
{
"questionId": "q63",
"format": "json",
"model": "gpt-5-nano",
"expected": "528.71",
"actual": "528.71",
"isCorrect": true,
"inputTokens": 9739,
"outputTokens": 265,
"latencyMs": 4022.9598750000005
},
{
"questionId": "q63",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "528.71",
"actual": "528.71",
"isCorrect": true,
"inputTokens": 11907,
"outputTokens": 7,
"latencyMs": 1480.8643750000047
},
{
"questionId": "q63",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "528.71",
"actual": "528.71",
"isCorrect": true,
"inputTokens": 12113,
"outputTokens": 6,
"latencyMs": 1615.6131670000032
},
{
"questionId": "q63",
"format": "toon",
"model": "gpt-5-nano",
"expected": "528.71",
"actual": "528.71",
"isCorrect": true,
"inputTokens": 6013,
"outputTokens": 265,
"latencyMs": 3674.1392500000075
},
{
"questionId": "q63",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "528.71",
"actual": "528.71",
"isCorrect": true,
"inputTokens": 6993,
"outputTokens": 7,
"latencyMs": 1060.8583750000107
},
{
"questionId": "q63",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "528.71",
"actual": "528.71",
"isCorrect": true,
"inputTokens": 7201,
"outputTokens": 6,
"latencyMs": 1496.0798749999958
},
{
"questionId": "q63",
"format": "csv",
"model": "gpt-5-nano",
"expected": "528.71",
"actual": "528.71",
"isCorrect": true,
"inputTokens": 6781,
"outputTokens": 329,
"latencyMs": 3936.86050000001
},
{
"questionId": "q63",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "528.71",
"actual": "528.71",
"isCorrect": true,
"inputTokens": 8414,
"outputTokens": 7,
"latencyMs": 1451.5014170000213
},
{
"questionId": "q63",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "528.71",
"actual": "528.71",
"isCorrect": true,
"inputTokens": 7838,
"outputTokens": 6,
"latencyMs": 3275.3027920000022
},
{
"questionId": "q63",
"format": "xml",
"model": "gpt-5-nano",
"expected": "528.71",
"actual": "528.71",
"isCorrect": true,
"inputTokens": 11037,
"outputTokens": 521,
"latencyMs": 7834.65945799998
},
{
"questionId": "q63",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "528.71",
"actual": "528.71",
"isCorrect": true,
"inputTokens": 13380,
"outputTokens": 7,
"latencyMs": 1066.7734170000185
},
{
"questionId": "q63",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "528.71",
"actual": "528.71",
"isCorrect": true,
"inputTokens": 13451,
"outputTokens": 6,
"latencyMs": 1091.2406670000055
},
{
"questionId": "q63",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "528.71",
"actual": "528.71",
"isCorrect": true,
"inputTokens": 7373,
"outputTokens": 265,
"latencyMs": 7133.230082999973
},
{
"questionId": "q63",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "528.71",
"actual": "528.71",
"isCorrect": true,
"inputTokens": 8385,
"outputTokens": 7,
"latencyMs": 1334.3640829999931
},
{
"questionId": "q63",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "528.71",
"actual": "528.71",
"isCorrect": true,
"inputTokens": 8427,
"outputTokens": 6,
"latencyMs": 1548.7799590000068
},
{
"questionId": "q64",
"format": "json",
"model": "gpt-5-nano",
"expected": "pending",
"actual": "pending",
"isCorrect": true,
"inputTokens": 9738,
"outputTokens": 199,
"latencyMs": 3084.847666000016
},
{
"questionId": "q64",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "pending",
"actual": "pending",
"isCorrect": true,
"inputTokens": 11906,
"outputTokens": 4,
"latencyMs": 1400.1154589999933
},
{
"questionId": "q64",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "pending",
"actual": "pending",
"isCorrect": true,
"inputTokens": 12112,
"outputTokens": 1,
"latencyMs": 2145.6674999999814
},
{
"questionId": "q64",
"format": "toon",
"model": "gpt-5-nano",
"expected": "pending",
"actual": "pending",
"isCorrect": true,
"inputTokens": 6012,
"outputTokens": 199,
"latencyMs": 2951.514334000007
},
{
"questionId": "q64",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "pending",
"actual": "pending",
"isCorrect": true,
"inputTokens": 6992,
"outputTokens": 4,
"latencyMs": 1178.9784170000348
},
{
"questionId": "q64",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "pending",
"actual": "pending",
"isCorrect": true,
"inputTokens": 7200,
"outputTokens": 1,
"latencyMs": 1061.4745419999817
},
{
"questionId": "q64",
"format": "csv",
"model": "gpt-5-nano",
"expected": "pending",
"actual": "pending",
"isCorrect": true,
"inputTokens": 6780,
"outputTokens": 263,
"latencyMs": 3550.5126670000027
},
{
"questionId": "q64",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "pending",
"actual": "pending",
"isCorrect": true,
"inputTokens": 8413,
"outputTokens": 4,
"latencyMs": 1128.6832500000019
},
{
"questionId": "q64",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "pending",
"actual": "pending",
"isCorrect": true,
"inputTokens": 7837,
"outputTokens": 1,
"latencyMs": 2419.836874999979
},
{
"questionId": "q64",
"format": "xml",
"model": "gpt-5-nano",
"expected": "pending",
"actual": "pending",
"isCorrect": true,
"inputTokens": 11036,
"outputTokens": 263,
"latencyMs": 18500.49987499998
},
{
"questionId": "q64",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "pending",
"actual": "pending",
"isCorrect": true,
"inputTokens": 13379,
"outputTokens": 4,
"latencyMs": 1697.067417000013
},
{
"questionId": "q64",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "pending",
"actual": "pending",
"isCorrect": true,
"inputTokens": 13450,
"outputTokens": 1,
"latencyMs": 1665.4901669999817
},
{
"questionId": "q64",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "pending",
"actual": "pending",
"isCorrect": true,
"inputTokens": 7372,
"outputTokens": 135,
"latencyMs": 3648.2167090000003
},
{
"questionId": "q64",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "pending",
"actual": "pending",
"isCorrect": true,
"inputTokens": 8384,
"outputTokens": 4,
"latencyMs": 1223.7409169999883
},
{
"questionId": "q64",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "pending",
"actual": "pending",
"isCorrect": true,
"inputTokens": 8426,
"outputTokens": 1,
"latencyMs": 2938.2844999999506
},
{
"questionId": "q65",
"format": "json",
"model": "gpt-5-nano",
"expected": "1687.82",
"actual": "1687.82",
"isCorrect": true,
"inputTokens": 9739,
"outputTokens": 202,
"latencyMs": 3459.946917000052
},
{
"questionId": "q65",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "1687.82",
"actual": "1687.82",
"isCorrect": true,
"inputTokens": 11907,
"outputTokens": 8,
"latencyMs": 1173.402208000014
},
{
"questionId": "q65",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "1687.82",
"actual": "1687.82",
"isCorrect": true,
"inputTokens": 12113,
"outputTokens": 7,
"latencyMs": 3167.1566250000033
},
{
"questionId": "q65",
"format": "toon",
"model": "gpt-5-nano",
"expected": "1687.82",
"actual": "1687.82",
"isCorrect": true,
"inputTokens": 6013,
"outputTokens": 202,
"latencyMs": 3737.224749999994
},
{
"questionId": "q65",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "1687.82",
"actual": "1687.82",
"isCorrect": true,
"inputTokens": 6993,
"outputTokens": 8,
"latencyMs": 926.1720830000122
},
{
"questionId": "q65",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "1687.82",
"actual": "1687.82",
"isCorrect": true,
"inputTokens": 7201,
"outputTokens": 7,
"latencyMs": 1469.4704999999958
},
{
"questionId": "q65",
"format": "csv",
"model": "gpt-5-nano",
"expected": "1687.82",
"actual": "1687.82",
"isCorrect": true,
"inputTokens": 6781,
"outputTokens": 266,
"latencyMs": 4014.4818339999765
},
{
"questionId": "q65",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "1687.82",
"actual": "1687.82",
"isCorrect": true,
"inputTokens": 8414,
"outputTokens": 8,
"latencyMs": 1132.7197079999605
},
{
"questionId": "q65",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "1687.82",
"actual": "1687.82",
"isCorrect": true,
"inputTokens": 7838,
"outputTokens": 7,
"latencyMs": 3670.1206250000396
},
{
"questionId": "q65",
"format": "xml",
"model": "gpt-5-nano",
"expected": "1687.82",
"actual": "1687.82",
"isCorrect": true,
"inputTokens": 11037,
"outputTokens": 202,
"latencyMs": 4318.927583000041
},
{
"questionId": "q65",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "1687.82",
"actual": "1687.82",
"isCorrect": true,
"inputTokens": 13380,
"outputTokens": 8,
"latencyMs": 1835.1892919999664
},
{
"questionId": "q65",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "1687.82",
"actual": "1687.82",
"isCorrect": true,
"inputTokens": 13451,
"outputTokens": 7,
"latencyMs": 1211.4787500000093
},
{
"questionId": "q65",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "1687.82",
"actual": "1687.82",
"isCorrect": true,
"inputTokens": 7373,
"outputTokens": 202,
"latencyMs": 3591.6950419999775
},
{
"questionId": "q65",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "1687.82",
"actual": "1687.82",
"isCorrect": true,
"inputTokens": 8385,
"outputTokens": 8,
"latencyMs": 1278.8472920000204
},
{
"questionId": "q65",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "1687.82",
"actual": "1687.82",
"isCorrect": true,
"inputTokens": 8427,
"outputTokens": 7,
"latencyMs": 2102.123208999983
},
{
"questionId": "q66",
"format": "json",
"model": "gpt-5-nano",
"expected": "cancelled",
"actual": "cancelled",
"isCorrect": true,
"inputTokens": 9738,
"outputTokens": 136,
"latencyMs": 2793.1591250000056
},
{
"questionId": "q66",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "cancelled",
"actual": "cancelled",
"isCorrect": true,
"inputTokens": 11906,
"outputTokens": 4,
"latencyMs": 1319.3459579999908
},
{
"questionId": "q66",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "cancelled",
"actual": "cancelled",
"isCorrect": true,
"inputTokens": 12112,
"outputTokens": 1,
"latencyMs": 1572.3595830000122
},
{
"questionId": "q66",
"format": "toon",
"model": "gpt-5-nano",
"expected": "cancelled",
"actual": "cancelled",
"isCorrect": true,
"inputTokens": 6012,
"outputTokens": 264,
"latencyMs": 4642.070207999961
},
{
"questionId": "q66",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "cancelled",
"actual": "cancelled",
"isCorrect": true,
"inputTokens": 6992,
"outputTokens": 4,
"latencyMs": 1161.8217919999734
},
{
"questionId": "q66",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "cancelled",
"actual": "cancelled",
"isCorrect": true,
"inputTokens": 7200,
"outputTokens": 1,
"latencyMs": 1045.6249589999788
},
{
"questionId": "q66",
"format": "csv",
"model": "gpt-5-nano",
"expected": "cancelled",
"actual": "cancelled",
"isCorrect": true,
"inputTokens": 6780,
"outputTokens": 200,
"latencyMs": 3501.1775419999612
},
{
"questionId": "q66",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "cancelled",
"actual": "cancelled",
"isCorrect": true,
"inputTokens": 8413,
"outputTokens": 4,
"latencyMs": 1463.0212910000118
},
{
"questionId": "q66",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "cancelled",
"actual": "cancelled",
"isCorrect": true,
"inputTokens": 7837,
"outputTokens": 1,
"latencyMs": 1782.100999999966
},
{
"questionId": "q66",
"format": "xml",
"model": "gpt-5-nano",
"expected": "cancelled",
"actual": "cancelled",
"isCorrect": true,
"inputTokens": 11036,
"outputTokens": 584,
"latencyMs": 7168.528500000015
},
{
"questionId": "q66",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "cancelled",
"actual": "cancelled",
"isCorrect": true,
"inputTokens": 13379,
"outputTokens": 4,
"latencyMs": 1339.9878749999916
},
{
"questionId": "q66",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "cancelled",
"actual": "cancelled",
"isCorrect": true,
"inputTokens": 13450,
"outputTokens": 1,
"latencyMs": 1196.7808749999967
},
{
"questionId": "q66",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "cancelled",
"actual": "cancelled",
"isCorrect": true,
"inputTokens": 7372,
"outputTokens": 328,
"latencyMs": 4938.96991699998
},
{
"questionId": "q66",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "cancelled",
"actual": "cancelled",
"isCorrect": true,
"inputTokens": 8384,
"outputTokens": 4,
"latencyMs": 1121.6232500000042
},
{
"questionId": "q66",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "cancelled",
"actual": "cancelled",
"isCorrect": true,
"inputTokens": 8426,
"outputTokens": 1,
"latencyMs": 1062.6134160000365
},
{
"questionId": "q67",
"format": "json",
"model": "gpt-5-nano",
"expected": "423.6",
"actual": "423.6",
"isCorrect": true,
"inputTokens": 9739,
"outputTokens": 137,
"latencyMs": 2332.1545840000035
},
{
"questionId": "q67",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "423.6",
"actual": "423.6",
"isCorrect": true,
"inputTokens": 11907,
"outputTokens": 7,
"latencyMs": 1210.105333000014
},
{
"questionId": "q67",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "423.6",
"actual": "423.6",
"isCorrect": true,
"inputTokens": 12113,
"outputTokens": 5,
"latencyMs": 2248.713915999979
},
{
"questionId": "q67",
"format": "toon",
"model": "gpt-5-nano",
"expected": "423.6",
"actual": "423.6",
"isCorrect": true,
"inputTokens": 6013,
"outputTokens": 201,
"latencyMs": 5095.391790999973
},
{
"questionId": "q67",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "423.6",
"actual": "423.6",
"isCorrect": true,
"inputTokens": 6993,
"outputTokens": 7,
"latencyMs": 2002.2553749999497
},
{
"questionId": "q67",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "423.6",
"actual": "423.6",
"isCorrect": true,
"inputTokens": 7201,
"outputTokens": 5,
"latencyMs": 1447.1179159999592
},
{
"questionId": "q67",
"format": "csv",
"model": "gpt-5-nano",
"expected": "423.6",
"actual": "423.6",
"isCorrect": true,
"inputTokens": 6781,
"outputTokens": 201,
"latencyMs": 7838.877333000011
},
{
"questionId": "q67",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "423.6",
"actual": "423.6",
"isCorrect": true,
"inputTokens": 8414,
"outputTokens": 7,
"latencyMs": 1108.0410839999677
},
{
"questionId": "q67",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "423.6",
"actual": "423.6",
"isCorrect": true,
"inputTokens": 7838,
"outputTokens": 5,
"latencyMs": 2419.8735420000157
},
{
"questionId": "q67",
"format": "xml",
"model": "gpt-5-nano",
"expected": "423.6",
"actual": "423.6",
"isCorrect": true,
"inputTokens": 11037,
"outputTokens": 201,
"latencyMs": 4098.654000000039
},
{
"questionId": "q67",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "423.6",
"actual": "423.6",
"isCorrect": true,
"inputTokens": 13380,
"outputTokens": 7,
"latencyMs": 1200.5831250000047
},
{
"questionId": "q67",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "423.6",
"actual": "423.6",
"isCorrect": true,
"inputTokens": 13451,
"outputTokens": 5,
"latencyMs": 1685.785542000027
},
{
"questionId": "q67",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "423.6",
"actual": "423.6",
"isCorrect": true,
"inputTokens": 7373,
"outputTokens": 201,
"latencyMs": 4059.9044170000125
},
{
"questionId": "q67",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "423.6",
"actual": "423.6",
"isCorrect": true,
"inputTokens": 8385,
"outputTokens": 7,
"latencyMs": 1264.0358329999726
},
{
"questionId": "q67",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "423.6",
"actual": "423.6",
"isCorrect": true,
"inputTokens": 8427,
"outputTokens": 5,
"latencyMs": 1237.0989580000169
},
{
"questionId": "q68",
"format": "json",
"model": "gpt-5-nano",
"expected": "delivered",
"actual": "delivered",
"isCorrect": true,
"inputTokens": 9738,
"outputTokens": 200,
"latencyMs": 3303.1327499999898
},
{
"questionId": "q68",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "delivered",
"actual": "delivered",
"isCorrect": true,
"inputTokens": 11906,
"outputTokens": 4,
"latencyMs": 1808.5881250000093
},
{
"questionId": "q68",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "delivered",
"actual": "delivered",
"isCorrect": true,
"inputTokens": 12112,
"outputTokens": 1,
"latencyMs": 1355.4241669999901
},
{
"questionId": "q68",
"format": "toon",
"model": "gpt-5-nano",
"expected": "delivered",
"actual": "delivered",
"isCorrect": true,
"inputTokens": 6012,
"outputTokens": 200,
"latencyMs": 3711.711249999993
},
{
"questionId": "q68",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "delivered",
"actual": "delivered",
"isCorrect": true,
"inputTokens": 6992,
"outputTokens": 4,
"latencyMs": 1294.2883750000037
},
{
"questionId": "q68",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "delivered",
"actual": "delivered",
"isCorrect": true,
"inputTokens": 7200,
"outputTokens": 1,
"latencyMs": 1162.5020840000361
},
{
"questionId": "q68",
"format": "csv",
"model": "gpt-5-nano",
"expected": "delivered",
"actual": "delivered",
"isCorrect": true,
"inputTokens": 6780,
"outputTokens": 264,
"latencyMs": 3022.083249999967
},
{
"questionId": "q68",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "delivered",
"actual": "delivered",
"isCorrect": true,
"inputTokens": 8413,
"outputTokens": 4,
"latencyMs": 944.2437079999945
},
{
"questionId": "q68",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "delivered",
"actual": "delivered",
"isCorrect": true,
"inputTokens": 7837,
"outputTokens": 1,
"latencyMs": 3629.1201669999864
},
{
"questionId": "q68",
"format": "xml",
"model": "gpt-5-nano",
"expected": "delivered",
"actual": "delivered",
"isCorrect": true,
"inputTokens": 11036,
"outputTokens": 456,
"latencyMs": 4701.368916000007
},
{
"questionId": "q68",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "delivered",
"actual": "delivered",
"isCorrect": true,
"inputTokens": 13379,
"outputTokens": 4,
"latencyMs": 1121.0914999999804
},
{
"questionId": "q68",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "delivered",
"actual": "delivered",
"isCorrect": true,
"inputTokens": 13450,
"outputTokens": 1,
"latencyMs": 2000.4341669999994
},
{
"questionId": "q68",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "delivered",
"actual": "delivered",
"isCorrect": true,
"inputTokens": 7372,
"outputTokens": 200,
"latencyMs": 6000.394582999987
},
{
"questionId": "q68",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "delivered",
"actual": "delivered",
"isCorrect": true,
"inputTokens": 8384,
"outputTokens": 4,
"latencyMs": 1584.1092090000166
},
{
"questionId": "q68",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "delivered",
"actual": "delivered",
"isCorrect": true,
"inputTokens": 8426,
"outputTokens": 1,
"latencyMs": 2002.2350420000148
},
{
"questionId": "q69",
"format": "json",
"model": "gpt-5-nano",
"expected": "784.03",
"actual": "784.03",
"isCorrect": true,
"inputTokens": 9739,
"outputTokens": 265,
"latencyMs": 7792.974290999991
},
{
"questionId": "q69",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "784.03",
"actual": "784.03",
"isCorrect": true,
"inputTokens": 11907,
"outputTokens": 7,
"latencyMs": 2028.2800829999615
},
{
"questionId": "q69",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "784.03",
"actual": "784.03",
"isCorrect": true,
"inputTokens": 12113,
"outputTokens": 6,
"latencyMs": 1505.0516669999924
},
{
"questionId": "q69",
"format": "toon",
"model": "gpt-5-nano",
"expected": "784.03",
"actual": "784.03",
"isCorrect": true,
"inputTokens": 6013,
"outputTokens": 201,
"latencyMs": 7270.891041999974
},
{
"questionId": "q69",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "784.03",
"actual": "784.03",
"isCorrect": true,
"inputTokens": 6993,
"outputTokens": 7,
"latencyMs": 2478.4481660000165
},
{
"questionId": "q69",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "784.03",
"actual": "784.03",
"isCorrect": true,
"inputTokens": 7201,
"outputTokens": 6,
"latencyMs": 1305.2497500000172
},
{
"questionId": "q69",
"format": "csv",
"model": "gpt-5-nano",
"expected": "784.03",
"actual": "784.03",
"isCorrect": true,
"inputTokens": 6781,
"outputTokens": 393,
"latencyMs": 6261.073583999998
},
{
"questionId": "q69",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "784.03",
"actual": "784.03",
"isCorrect": true,
"inputTokens": 8414,
"outputTokens": 7,
"latencyMs": 1863.528500000015
},
{
"questionId": "q69",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "784.03",
"actual": "784.03",
"isCorrect": true,
"inputTokens": 7838,
"outputTokens": 6,
"latencyMs": 3306.4452499999898
},
{
"questionId": "q69",
"format": "xml",
"model": "gpt-5-nano",
"expected": "784.03",
"actual": "784.03",
"isCorrect": true,
"inputTokens": 11037,
"outputTokens": 265,
"latencyMs": 3464.767792000028
},
{
"questionId": "q69",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "784.03",
"actual": "784.03",
"isCorrect": true,
"inputTokens": 13380,
"outputTokens": 7,
"latencyMs": 1144.0890420000069
},
{
"questionId": "q69",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "784.03",
"actual": "784.03",
"isCorrect": true,
"inputTokens": 13451,
"outputTokens": 6,
"latencyMs": 1458.4538750000065
},
{
"questionId": "q69",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "784.03",
"actual": "784.03",
"isCorrect": true,
"inputTokens": 7373,
"outputTokens": 201,
"latencyMs": 3276.8598340000026
},
{
"questionId": "q69",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "784.03",
"actual": "784.03",
"isCorrect": true,
"inputTokens": 8385,
"outputTokens": 7,
"latencyMs": 1434.8686669999734
},
{
"questionId": "q69",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "784.03",
"actual": "784.03",
"isCorrect": true,
"inputTokens": 8427,
"outputTokens": 6,
"latencyMs": 1570.2152500000084
},
{
"questionId": "q70",
"format": "json",
"model": "gpt-5-nano",
"expected": "shipped",
"actual": "shipped",
"isCorrect": true,
"inputTokens": 9738,
"outputTokens": 200,
"latencyMs": 3532.8103330000304
},
{
"questionId": "q70",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "shipped",
"actual": "shipped",
"isCorrect": true,
"inputTokens": 11906,
"outputTokens": 4,
"latencyMs": 1212.3070409999928
},
{
"questionId": "q70",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "shipped",
"actual": "shipped",
"isCorrect": true,
"inputTokens": 12112,
"outputTokens": 2,
"latencyMs": 1246.4002080000355
},
{
"questionId": "q70",
"format": "toon",
"model": "gpt-5-nano",
"expected": "shipped",
"actual": "shipped",
"isCorrect": true,
"inputTokens": 6012,
"outputTokens": 136,
"latencyMs": 6942.459582999989
},
{
"questionId": "q70",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "shipped",
"actual": "shipped",
"isCorrect": true,
"inputTokens": 6992,
"outputTokens": 4,
"latencyMs": 1144.068333000003
},
{
"questionId": "q70",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "shipped",
"actual": "shipped",
"isCorrect": true,
"inputTokens": 7200,
"outputTokens": 2,
"latencyMs": 2209.296417000005
},
{
"questionId": "q70",
"format": "csv",
"model": "gpt-5-nano",
"expected": "shipped",
"actual": "shipped",
"isCorrect": true,
"inputTokens": 6780,
"outputTokens": 136,
"latencyMs": 4940.5221670000465
},
{
"questionId": "q70",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "shipped",
"actual": "shipped",
"isCorrect": true,
"inputTokens": 8413,
"outputTokens": 4,
"latencyMs": 1493.192041000002
},
{
"questionId": "q70",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "shipped",
"actual": "shipped",
"isCorrect": true,
"inputTokens": 7837,
"outputTokens": 2,
"latencyMs": 1817.8049579999642
},
{
"questionId": "q70",
"format": "xml",
"model": "gpt-5-nano",
"expected": "shipped",
"actual": "shipped",
"isCorrect": true,
"inputTokens": 11036,
"outputTokens": 136,
"latencyMs": 3458.8650829999824
},
{
"questionId": "q70",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "shipped",
"actual": "shipped",
"isCorrect": true,
"inputTokens": 13379,
"outputTokens": 4,
"latencyMs": 1401.621165999968
},
{
"questionId": "q70",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "shipped",
"actual": "shipped",
"isCorrect": true,
"inputTokens": 13450,
"outputTokens": 2,
"latencyMs": 3644.271166999999
},
{
"questionId": "q70",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "shipped",
"actual": "shipped",
"isCorrect": true,
"inputTokens": 7372,
"outputTokens": 200,
"latencyMs": 2859.7807909999974
},
{
"questionId": "q70",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "shipped",
"actual": "shipped",
"isCorrect": true,
"inputTokens": 8384,
"outputTokens": 4,
"latencyMs": 1170.455874999985
},
{
"questionId": "q70",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "shipped",
"actual": "shipped",
"isCorrect": true,
"inputTokens": 8426,
"outputTokens": 2,
"latencyMs": 2668.4208750000107
},
{
"questionId": "q71",
"format": "json",
"model": "gpt-5-nano",
"expected": "645.88",
"actual": "645.88",
"isCorrect": true,
"inputTokens": 9739,
"outputTokens": 265,
"latencyMs": 3387.9897919999785
},
{
"questionId": "q71",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "645.88",
"actual": "645.88",
"isCorrect": true,
"inputTokens": 11907,
"outputTokens": 7,
"latencyMs": 1210.6735000000335
},
{
"questionId": "q71",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "645.88",
"actual": "645.88",
"isCorrect": true,
"inputTokens": 12113,
"outputTokens": 6,
"latencyMs": 2313.2734579999815
},
{
"questionId": "q71",
"format": "toon",
"model": "gpt-5-nano",
"expected": "645.88",
"actual": "645.88",
"isCorrect": true,
"inputTokens": 6013,
"outputTokens": 201,
"latencyMs": 2948.030916000018
},
{
"questionId": "q71",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "645.88",
"actual": "645.88",
"isCorrect": true,
"inputTokens": 6993,
"outputTokens": 7,
"latencyMs": 1499.2446670000209
},
{
"questionId": "q71",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "645.88",
"actual": "645.88",
"isCorrect": true,
"inputTokens": 7201,
"outputTokens": 6,
"latencyMs": 1259.240832999989
},
{
"questionId": "q71",
"format": "csv",
"model": "gpt-5-nano",
"expected": "645.88",
"actual": "645.88",
"isCorrect": true,
"inputTokens": 6781,
"outputTokens": 201,
"latencyMs": 8963.050458999991
},
{
"questionId": "q71",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "645.88",
"actual": "645.88",
"isCorrect": true,
"inputTokens": 8414,
"outputTokens": 7,
"latencyMs": 1168.6370839999872
},
{
"questionId": "q71",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "645.88",
"actual": "645.88",
"isCorrect": true,
"inputTokens": 7838,
"outputTokens": 6,
"latencyMs": 2633.771375000011
},
{
"questionId": "q71",
"format": "xml",
"model": "gpt-5-nano",
"expected": "645.88",
"actual": "645.88",
"isCorrect": true,
"inputTokens": 11037,
"outputTokens": 329,
"latencyMs": 7189.561790999956
},
{
"questionId": "q71",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "645.88",
"actual": "645.88",
"isCorrect": true,
"inputTokens": 13380,
"outputTokens": 7,
"latencyMs": 1225.8507080000127
},
{
"questionId": "q71",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "645.88",
"actual": "645.88",
"isCorrect": true,
"inputTokens": 13451,
"outputTokens": 6,
"latencyMs": 1124.1396250000107
},
{
"questionId": "q71",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "645.88",
"actual": "645.88",
"isCorrect": true,
"inputTokens": 7373,
"outputTokens": 201,
"latencyMs": 3990.592707999982
},
{
"questionId": "q71",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "645.88",
"actual": "645.88",
"isCorrect": true,
"inputTokens": 8385,
"outputTokens": 7,
"latencyMs": 1128.0700419999775
},
{
"questionId": "q71",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "645.88",
"actual": "645.88",
"isCorrect": true,
"inputTokens": 8427,
"outputTokens": 6,
"latencyMs": 1804.0158330000122
},
{
"questionId": "q72",
"format": "json",
"model": "gpt-5-nano",
"expected": "processing",
"actual": "processing",
"isCorrect": true,
"inputTokens": 9738,
"outputTokens": 263,
"latencyMs": 3661.423624999996
},
{
"questionId": "q72",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "processing",
"actual": "processing",
"isCorrect": true,
"inputTokens": 11906,
"outputTokens": 4,
"latencyMs": 1125.6147919999785
},
{
"questionId": "q72",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "processing",
"actual": "processing",
"isCorrect": true,
"inputTokens": 12112,
"outputTokens": 1,
"latencyMs": 1711.6630829999922
},
{
"questionId": "q72",
"format": "toon",
"model": "gpt-5-nano",
"expected": "processing",
"actual": "processing",
"isCorrect": true,
"inputTokens": 6012,
"outputTokens": 199,
"latencyMs": 3128.0557079999708
},
{
"questionId": "q72",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "processing",
"actual": "processing",
"isCorrect": true,
"inputTokens": 6992,
"outputTokens": 4,
"latencyMs": 1669.1822079999838
},
{
"questionId": "q72",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "processing",
"actual": "processing",
"isCorrect": true,
"inputTokens": 7200,
"outputTokens": 1,
"latencyMs": 1274.667958999984
},
{
"questionId": "q72",
"format": "csv",
"model": "gpt-5-nano",
"expected": "processing",
"actual": "processing",
"isCorrect": true,
"inputTokens": 6780,
"outputTokens": 263,
"latencyMs": 3663.237792
},
{
"questionId": "q72",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "processing",
"actual": "processing",
"isCorrect": true,
"inputTokens": 8413,
"outputTokens": 4,
"latencyMs": 1122.126249999972
},
{
"questionId": "q72",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "processing",
"actual": "processing",
"isCorrect": true,
"inputTokens": 7837,
"outputTokens": 1,
"latencyMs": 1549.8010420000064
},
{
"questionId": "q72",
"format": "xml",
"model": "gpt-5-nano",
"expected": "processing",
"actual": "processing",
"isCorrect": true,
"inputTokens": 11036,
"outputTokens": 327,
"latencyMs": 6674.916083000018
},
{
"questionId": "q72",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "processing",
"actual": "processing",
"isCorrect": true,
"inputTokens": 13379,
"outputTokens": 4,
"latencyMs": 1230.8339169999817
},
{
"questionId": "q72",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "processing",
"actual": "processing",
"isCorrect": true,
"inputTokens": 13450,
"outputTokens": 1,
"latencyMs": 992.4760409999872
},
{
"questionId": "q72",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "processing",
"actual": "processing",
"isCorrect": true,
"inputTokens": 7372,
"outputTokens": 199,
"latencyMs": 3755.6932919999817
},
{
"questionId": "q72",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "processing",
"actual": "processing",
"isCorrect": true,
"inputTokens": 8384,
"outputTokens": 4,
"latencyMs": 1540.152833
},
{
"questionId": "q72",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "processing",
"actual": "processing",
"isCorrect": true,
"inputTokens": 8426,
"outputTokens": 1,
"latencyMs": 2185.4502910000156
},
{
"questionId": "q73",
"format": "json",
"model": "gpt-5-nano",
"expected": "371.91",
"actual": "371.91",
"isCorrect": true,
"inputTokens": 9739,
"outputTokens": 265,
"latencyMs": 3809.869667000021
},
{
"questionId": "q73",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "371.91",
"actual": "371.91",
"isCorrect": true,
"inputTokens": 11907,
"outputTokens": 7,
"latencyMs": 1150.84375
},
{
"questionId": "q73",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "371.91",
"actual": "371.91",
"isCorrect": true,
"inputTokens": 12113,
"outputTokens": 6,
"latencyMs": 1217.3986659999937
},
{
"questionId": "q73",
"format": "toon",
"model": "gpt-5-nano",
"expected": "371.91",
"actual": "371.91",
"isCorrect": true,
"inputTokens": 6013,
"outputTokens": 137,
"latencyMs": 2091.0124589999905
},
{
"questionId": "q73",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "371.91",
"actual": "371.91",
"isCorrect": true,
"inputTokens": 6993,
"outputTokens": 7,
"latencyMs": 1357.4467920000316
},
{
"questionId": "q73",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "371.91",
"actual": "371.91",
"isCorrect": true,
"inputTokens": 7201,
"outputTokens": 6,
"latencyMs": 2377.229250000033
},
{
"questionId": "q73",
"format": "csv",
"model": "gpt-5-nano",
"expected": "371.91",
"actual": "371.91",
"isCorrect": true,
"inputTokens": 6781,
"outputTokens": 201,
"latencyMs": 2673.4793749999953
},
{
"questionId": "q73",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "371.91",
"actual": "371.91",
"isCorrect": true,
"inputTokens": 8414,
"outputTokens": 7,
"latencyMs": 1785.7454999999609
},
{
"questionId": "q73",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "371.91",
"actual": "371.91",
"isCorrect": true,
"inputTokens": 7838,
"outputTokens": 6,
"latencyMs": 1956.5365410000086
},
{
"questionId": "q73",
"format": "xml",
"model": "gpt-5-nano",
"expected": "371.91",
"actual": "371.91",
"isCorrect": true,
"inputTokens": 11037,
"outputTokens": 201,
"latencyMs": 2943.3867910000263
},
{
"questionId": "q73",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "371.91",
"actual": "371.91",
"isCorrect": true,
"inputTokens": 13380,
"outputTokens": 7,
"latencyMs": 1264.3261250000214
},
{
"questionId": "q73",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "371.91",
"actual": "371.91",
"isCorrect": true,
"inputTokens": 13451,
"outputTokens": 6,
"latencyMs": 1479.502083999978
},
{
"questionId": "q73",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "371.91",
"actual": "371.91",
"isCorrect": true,
"inputTokens": 7373,
"outputTokens": 137,
"latencyMs": 2697.696667000011
},
{
"questionId": "q73",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "371.91",
"actual": "371.91",
"isCorrect": true,
"inputTokens": 8385,
"outputTokens": 7,
"latencyMs": 1319.8920829999843
},
{
"questionId": "q73",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "371.91",
"actual": "371.91",
"isCorrect": true,
"inputTokens": 8427,
"outputTokens": 6,
"latencyMs": 1655.4022090000217
},
{
"questionId": "q74",
"format": "json",
"model": "gpt-5-nano",
"expected": "pending",
"actual": "pending",
"isCorrect": true,
"inputTokens": 9738,
"outputTokens": 327,
"latencyMs": 3728.9863749999786
},
{
"questionId": "q74",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "pending",
"actual": "pending",
"isCorrect": true,
"inputTokens": 11906,
"outputTokens": 4,
"latencyMs": 1403.8238750000019
},
{
"questionId": "q74",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "pending",
"actual": "pending",
"isCorrect": true,
"inputTokens": 12112,
"outputTokens": 1,
"latencyMs": 1610.8924579999875
},
{
"questionId": "q74",
"format": "toon",
"model": "gpt-5-nano",
"expected": "pending",
"actual": "pending",
"isCorrect": true,
"inputTokens": 6012,
"outputTokens": 199,
"latencyMs": 3121.718416000018
},
{
"questionId": "q74",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "pending",
"actual": "pending",
"isCorrect": true,
"inputTokens": 6992,
"outputTokens": 4,
"latencyMs": 1051.426999999967
},
{
"questionId": "q74",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "pending",
"actual": "pending",
"isCorrect": true,
"inputTokens": 7200,
"outputTokens": 1,
"latencyMs": 1171.1483340000268
},
{
"questionId": "q74",
"format": "csv",
"model": "gpt-5-nano",
"expected": "pending",
"actual": "pending",
"isCorrect": true,
"inputTokens": 6780,
"outputTokens": 263,
"latencyMs": 2642.1894589999574
},
{
"questionId": "q74",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "pending",
"actual": "pending",
"isCorrect": true,
"inputTokens": 8413,
"outputTokens": 4,
"latencyMs": 1286.3537080000388
},
{
"questionId": "q74",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "pending",
"actual": "pending",
"isCorrect": true,
"inputTokens": 7837,
"outputTokens": 1,
"latencyMs": 3901.2503750000033
},
{
"questionId": "q74",
"format": "xml",
"model": "gpt-5-nano",
"expected": "pending",
"actual": "pending",
"isCorrect": true,
"inputTokens": 11036,
"outputTokens": 263,
"latencyMs": 3386.3902919999673
},
{
"questionId": "q74",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "pending",
"actual": "pending",
"isCorrect": true,
"inputTokens": 13379,
"outputTokens": 4,
"latencyMs": 1593.6848750000354
},
{
"questionId": "q74",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "pending",
"actual": "pending",
"isCorrect": true,
"inputTokens": 13450,
"outputTokens": 1,
"latencyMs": 1085.9149159999797
},
{
"questionId": "q74",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "pending",
"actual": "pending",
"isCorrect": true,
"inputTokens": 7372,
"outputTokens": 135,
"latencyMs": 2352.2881669999915
},
{
"questionId": "q74",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "pending",
"actual": "pending",
"isCorrect": true,
"inputTokens": 8384,
"outputTokens": 4,
"latencyMs": 1046.4814580000238
},
{
"questionId": "q74",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "pending",
"actual": "pending",
"isCorrect": true,
"inputTokens": 8426,
"outputTokens": 1,
"latencyMs": 1687.5740409999853
},
{
"questionId": "q75",
"format": "json",
"model": "gpt-5-nano",
"expected": "1066",
"actual": "1066",
"isCorrect": true,
"inputTokens": 9739,
"outputTokens": 264,
"latencyMs": 5460.0885409999755
},
{
"questionId": "q75",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "1066",
"actual": "1066",
"isCorrect": true,
"inputTokens": 11907,
"outputTokens": 6,
"latencyMs": 1246.0814159999718
},
{
"questionId": "q75",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "1066",
"actual": "1066",
"isCorrect": true,
"inputTokens": 12113,
"outputTokens": 4,
"latencyMs": 1696.832666000002
},
{
"questionId": "q75",
"format": "toon",
"model": "gpt-5-nano",
"expected": "1066",
"actual": "1066",
"isCorrect": true,
"inputTokens": 6013,
"outputTokens": 200,
"latencyMs": 2906.3054160000174
},
{
"questionId": "q75",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "1066",
"actual": "1066",
"isCorrect": true,
"inputTokens": 6993,
"outputTokens": 6,
"latencyMs": 1201.3947090000147
},
{
"questionId": "q75",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "1066",
"actual": "1066.00",
"isCorrect": true,
"inputTokens": 7201,
"outputTokens": 7,
"latencyMs": 1377.305457999988
},
{
"questionId": "q75",
"format": "csv",
"model": "gpt-5-nano",
"expected": "1066",
"actual": "1066",
"isCorrect": true,
"inputTokens": 6781,
"outputTokens": 456,
"latencyMs": 8801.27112499997
},
{
"questionId": "q75",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "1066",
"actual": "1066",
"isCorrect": true,
"inputTokens": 8414,
"outputTokens": 6,
"latencyMs": 1433.466666000022
},
{
"questionId": "q75",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "1066",
"actual": "1066",
"isCorrect": true,
"inputTokens": 7838,
"outputTokens": 4,
"latencyMs": 3448.654917000036
},
{
"questionId": "q75",
"format": "xml",
"model": "gpt-5-nano",
"expected": "1066",
"actual": "1066",
"isCorrect": true,
"inputTokens": 11037,
"outputTokens": 264,
"latencyMs": 4939.312791000004
},
{
"questionId": "q75",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "1066",
"actual": "1066",
"isCorrect": true,
"inputTokens": 13380,
"outputTokens": 6,
"latencyMs": 1252.419332999969
},
{
"questionId": "q75",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "1066",
"actual": "1066.00",
"isCorrect": true,
"inputTokens": 13451,
"outputTokens": 7,
"latencyMs": 1151.2592920000316
},
{
"questionId": "q75",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "1066",
"actual": "1066",
"isCorrect": true,
"inputTokens": 7373,
"outputTokens": 136,
"latencyMs": 3143.9853749999893
},
{
"questionId": "q75",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "1066",
"actual": "1066",
"isCorrect": true,
"inputTokens": 8385,
"outputTokens": 6,
"latencyMs": 1177.0768329999992
},
{
"questionId": "q75",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "1066",
"actual": "1066.0",
"isCorrect": true,
"inputTokens": 8427,
"outputTokens": 6,
"latencyMs": 1535.377165999962
},
{
"questionId": "q76",
"format": "json",
"model": "gpt-5-nano",
"expected": "cancelled",
"actual": "cancelled",
"isCorrect": true,
"inputTokens": 9738,
"outputTokens": 328,
"latencyMs": 10990.360375000047
},
{
"questionId": "q76",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "cancelled",
"actual": "cancelled",
"isCorrect": true,
"inputTokens": 11906,
"outputTokens": 4,
"latencyMs": 1467.304375000007
},
{
"questionId": "q76",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "cancelled",
"actual": "cancelled",
"isCorrect": true,
"inputTokens": 12112,
"outputTokens": 1,
"latencyMs": 1316.8680830000085
},
{
"questionId": "q76",
"format": "toon",
"model": "gpt-5-nano",
"expected": "cancelled",
"actual": "cancelled",
"isCorrect": true,
"inputTokens": 6012,
"outputTokens": 392,
"latencyMs": 4399.92220900004
},
{
"questionId": "q76",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "cancelled",
"actual": "cancelled",
"isCorrect": true,
"inputTokens": 6992,
"outputTokens": 4,
"latencyMs": 1077.4348749999772
},
{
"questionId": "q76",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "cancelled",
"actual": "cancelled",
"isCorrect": true,
"inputTokens": 7200,
"outputTokens": 1,
"latencyMs": 1317.501791000017
},
{
"questionId": "q76",
"format": "csv",
"model": "gpt-5-nano",
"expected": "cancelled",
"actual": "cancelled",
"isCorrect": true,
"inputTokens": 6780,
"outputTokens": 200,
"latencyMs": 4153.370333999977
},
{
"questionId": "q76",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "cancelled",
"actual": "cancelled",
"isCorrect": true,
"inputTokens": 8413,
"outputTokens": 4,
"latencyMs": 1147.2140420000069
},
{
"questionId": "q76",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "cancelled",
"actual": "cancelled",
"isCorrect": true,
"inputTokens": 7837,
"outputTokens": 1,
"latencyMs": 1243.451000000001
},
{
"questionId": "q76",
"format": "xml",
"model": "gpt-5-nano",
"expected": "cancelled",
"actual": "cancelled",
"isCorrect": true,
"inputTokens": 11036,
"outputTokens": 328,
"latencyMs": 7804.228665999952
},
{
"questionId": "q76",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "cancelled",
"actual": "cancelled",
"isCorrect": true,
"inputTokens": 13379,
"outputTokens": 4,
"latencyMs": 1144.1722500000033
},
{
"questionId": "q76",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "cancelled",
"actual": "cancelled",
"isCorrect": true,
"inputTokens": 13450,
"outputTokens": 1,
"latencyMs": 857.7333750000107
},
{
"questionId": "q76",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "cancelled",
"actual": "cancelled",
"isCorrect": true,
"inputTokens": 7372,
"outputTokens": 136,
"latencyMs": 2287.29574999999
},
{
"questionId": "q76",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "cancelled",
"actual": "cancelled",
"isCorrect": true,
"inputTokens": 8384,
"outputTokens": 4,
"latencyMs": 1285.9760839999653
},
{
"questionId": "q76",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "cancelled",
"actual": "cancelled",
"isCorrect": true,
"inputTokens": 8426,
"outputTokens": 1,
"latencyMs": 1174.2349580000155
},
{
"questionId": "q77",
"format": "json",
"model": "gpt-5-nano",
"expected": "1697.4",
"actual": "1697.4",
"isCorrect": true,
"inputTokens": 9739,
"outputTokens": 266,
"latencyMs": 4109.542333999998
},
{
"questionId": "q77",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "1697.4",
"actual": "1697.4",
"isCorrect": true,
"inputTokens": 11907,
"outputTokens": 8,
"latencyMs": 1433.0992499999702
},
{
"questionId": "q77",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "1697.4",
"actual": "1697.4",
"isCorrect": true,
"inputTokens": 12113,
"outputTokens": 6,
"latencyMs": 3301.268875000009
},
{
"questionId": "q77",
"format": "toon",
"model": "gpt-5-nano",
"expected": "1697.4",
"actual": "1697.4",
"isCorrect": true,
"inputTokens": 6013,
"outputTokens": 394,
"latencyMs": 4952.654542000033
},
{
"questionId": "q77",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "1697.4",
"actual": "1697.4",
"isCorrect": true,
"inputTokens": 6993,
"outputTokens": 8,
"latencyMs": 1165.5959999999614
},
{
"questionId": "q77",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "1697.4",
"actual": "1697.4",
"isCorrect": true,
"inputTokens": 7201,
"outputTokens": 6,
"latencyMs": 982.1686660000123
},
{
"questionId": "q77",
"format": "csv",
"model": "gpt-5-nano",
"expected": "1697.4",
"actual": "1697.4",
"isCorrect": true,
"inputTokens": 6781,
"outputTokens": 266,
"latencyMs": 4735.772292000009
},
{
"questionId": "q77",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "1697.4",
"actual": "1697.4",
"isCorrect": true,
"inputTokens": 8414,
"outputTokens": 8,
"latencyMs": 1361.5435829999624
},
{
"questionId": "q77",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "1697.4",
"actual": "1697.4",
"isCorrect": true,
"inputTokens": 7838,
"outputTokens": 6,
"latencyMs": 2838.4672920000157
},
{
"questionId": "q77",
"format": "xml",
"model": "gpt-5-nano",
"expected": "1697.4",
"actual": "1697.4",
"isCorrect": true,
"inputTokens": 11037,
"outputTokens": 394,
"latencyMs": 4771.182459000032
},
{
"questionId": "q77",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "1697.4",
"actual": "1697.4",
"isCorrect": true,
"inputTokens": 13380,
"outputTokens": 8,
"latencyMs": 1202.4828330000164
},
{
"questionId": "q77",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "1697.4",
"actual": "1697.4",
"isCorrect": true,
"inputTokens": 13451,
"outputTokens": 6,
"latencyMs": 1063.3247500000289
},
{
"questionId": "q77",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "1697.4",
"actual": "1697.4",
"isCorrect": true,
"inputTokens": 7373,
"outputTokens": 202,
"latencyMs": 7751.146624999994
},
{
"questionId": "q77",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "1697.4",
"actual": "1697.4",
"isCorrect": true,
"inputTokens": 8385,
"outputTokens": 8,
"latencyMs": 1352.936708000023
},
{
"questionId": "q77",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "1697.4",
"actual": "1697.4",
"isCorrect": true,
"inputTokens": 8427,
"outputTokens": 6,
"latencyMs": 3135.286582999979
},
{
"questionId": "q78",
"format": "json",
"model": "gpt-5-nano",
"expected": "delivered",
"actual": "delivered",
"isCorrect": true,
"inputTokens": 9738,
"outputTokens": 264,
"latencyMs": 3105.402541999996
},
{
"questionId": "q78",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "delivered",
"actual": "delivered",
"isCorrect": true,
"inputTokens": 11906,
"outputTokens": 4,
"latencyMs": 1140.6077500000247
},
{
"questionId": "q78",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "delivered",
"actual": "delivered",
"isCorrect": true,
"inputTokens": 12112,
"outputTokens": 1,
"latencyMs": 1257.6969169999938
},
{
"questionId": "q78",
"format": "toon",
"model": "gpt-5-nano",
"expected": "delivered",
"actual": "delivered",
"isCorrect": true,
"inputTokens": 6012,
"outputTokens": 72,
"latencyMs": 2142.8472499999916
},
{
"questionId": "q78",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "delivered",
"actual": "delivered",
"isCorrect": true,
"inputTokens": 6992,
"outputTokens": 4,
"latencyMs": 1485.6063330000034
},
{
"questionId": "q78",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "delivered",
"actual": "delivered",
"isCorrect": true,
"inputTokens": 7200,
"outputTokens": 1,
"latencyMs": 1350.4362079999992
},
{
"questionId": "q78",
"format": "csv",
"model": "gpt-5-nano",
"expected": "delivered",
"actual": "delivered",
"isCorrect": true,
"inputTokens": 6780,
"outputTokens": 264,
"latencyMs": 3870.94754199998
},
{
"questionId": "q78",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "delivered",
"actual": "delivered",
"isCorrect": true,
"inputTokens": 8413,
"outputTokens": 4,
"latencyMs": 1153.2942499999772
},
{
"questionId": "q78",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "delivered",
"actual": "delivered",
"isCorrect": true,
"inputTokens": 7837,
"outputTokens": 1,
"latencyMs": 2935.8738330000197
},
{
"questionId": "q78",
"format": "xml",
"model": "gpt-5-nano",
"expected": "delivered",
"actual": "delivered",
"isCorrect": true,
"inputTokens": 11036,
"outputTokens": 328,
"latencyMs": 4063.2786669999477
},
{
"questionId": "q78",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "delivered",
"actual": "delivered",
"isCorrect": true,
"inputTokens": 13379,
"outputTokens": 4,
"latencyMs": 1202.6428329999908
},
{
"questionId": "q78",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "delivered",
"actual": "delivered",
"isCorrect": true,
"inputTokens": 13450,
"outputTokens": 1,
"latencyMs": 1221.4335410000058
},
{
"questionId": "q78",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "delivered",
"actual": "delivered",
"isCorrect": true,
"inputTokens": 7372,
"outputTokens": 200,
"latencyMs": 5382.740458999993
},
{
"questionId": "q78",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "delivered",
"actual": "delivered",
"isCorrect": true,
"inputTokens": 8384,
"outputTokens": 4,
"latencyMs": 1434.1426659999997
},
{
"questionId": "q78",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "delivered",
"actual": "delivered",
"isCorrect": true,
"inputTokens": 8426,
"outputTokens": 1,
"latencyMs": 1046.8339999999735
},
{
"questionId": "q79",
"format": "json",
"model": "gpt-5-nano",
"expected": "Valerie Braun",
"actual": "Valerie Braun",
"isCorrect": true,
"inputTokens": 9739,
"outputTokens": 73,
"latencyMs": 2607.845874999999
},
{
"questionId": "q79",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Valerie Braun",
"actual": "Valerie Braun",
"isCorrect": true,
"inputTokens": 11907,
"outputTokens": 9,
"latencyMs": 1676.4270830000169
},
{
"questionId": "q79",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "Valerie Braun",
"actual": "Valerie Braun",
"isCorrect": true,
"inputTokens": 12113,
"outputTokens": 3,
"latencyMs": 1219.0042910000193
},
{
"questionId": "q79",
"format": "toon",
"model": "gpt-5-nano",
"expected": "Valerie Braun",
"actual": "Valerie Braun",
"isCorrect": true,
"inputTokens": 6013,
"outputTokens": 137,
"latencyMs": 3378.1006669999915
},
{
"questionId": "q79",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Valerie Braun",
"actual": "Valerie Braun",
"isCorrect": true,
"inputTokens": 6993,
"outputTokens": 9,
"latencyMs": 1979.5205839999835
},
{
"questionId": "q79",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "Valerie Braun",
"actual": "Valerie Braun",
"isCorrect": true,
"inputTokens": 7201,
"outputTokens": 3,
"latencyMs": 1439.3422910000081
},
{
"questionId": "q79",
"format": "csv",
"model": "gpt-5-nano",
"expected": "Valerie Braun",
"actual": "Valerie Braun",
"isCorrect": true,
"inputTokens": 6781,
"outputTokens": 137,
"latencyMs": 2889.578749999986
},
{
"questionId": "q79",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Valerie Braun",
"actual": "Valerie Braun",
"isCorrect": true,
"inputTokens": 8414,
"outputTokens": 9,
"latencyMs": 1190.1848750000354
},
{
"questionId": "q79",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "Valerie Braun",
"actual": "Valerie Braun",
"isCorrect": true,
"inputTokens": 7838,
"outputTokens": 3,
"latencyMs": 2444.884665999969
},
{
"questionId": "q79",
"format": "xml",
"model": "gpt-5-nano",
"expected": "Valerie Braun",
"actual": "Valerie Braun",
"isCorrect": true,
"inputTokens": 11037,
"outputTokens": 73,
"latencyMs": 2360.869958999974
},
{
"questionId": "q79",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "Valerie Braun",
"actual": "Valerie Braun",
"isCorrect": true,
"inputTokens": 13380,
"outputTokens": 9,
"latencyMs": 1299.0499999999884
},
{
"questionId": "q79",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "Valerie Braun",
"actual": "Valerie Braun",
"isCorrect": true,
"inputTokens": 13451,
"outputTokens": 3,
"latencyMs": 932.0124589999905
},
{
"questionId": "q79",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "Valerie Braun",
"actual": "Valerie Braun",
"isCorrect": true,
"inputTokens": 7373,
"outputTokens": 73,
"latencyMs": 3092.9805410000263
},
{
"questionId": "q79",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Valerie Braun",
"actual": "Valerie Braun",
"isCorrect": true,
"inputTokens": 8385,
"outputTokens": 9,
"latencyMs": 1872.3574159999844
},
{
"questionId": "q79",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "Valerie Braun",
"actual": "Valerie Braun",
"isCorrect": true,
"inputTokens": 8427,
"outputTokens": 3,
"latencyMs": 1216.4535000000033
},
{
"questionId": "q80",
"format": "json",
"model": "gpt-5-nano",
"expected": "Anita Kozey",
"actual": "Anita Kozey",
"isCorrect": true,
"inputTokens": 9739,
"outputTokens": 138,
"latencyMs": 2404.87479099998
},
{
"questionId": "q80",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Anita Kozey",
"actual": "Anita Kozey",
"isCorrect": true,
"inputTokens": 11907,
"outputTokens": 9,
"latencyMs": 2182.619249999989
},
{
"questionId": "q80",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "Anita Kozey",
"actual": "Anita Kozey",
"isCorrect": true,
"inputTokens": 12113,
"outputTokens": 3,
"latencyMs": 1508.2469580000034
},
{
"questionId": "q80",
"format": "toon",
"model": "gpt-5-nano",
"expected": "Anita Kozey",
"actual": "Anita Kozey",
"isCorrect": true,
"inputTokens": 6013,
"outputTokens": 138,
"latencyMs": 3670.61050000001
},
{
"questionId": "q80",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Anita Kozey",
"actual": "Anita Kozey",
"isCorrect": true,
"inputTokens": 6993,
"outputTokens": 9,
"latencyMs": 1291.4328749999986
},
{
"questionId": "q80",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "Anita Kozey",
"actual": "Anita Kozey",
"isCorrect": true,
"inputTokens": 7201,
"outputTokens": 3,
"latencyMs": 1201.7425829999847
},
{
"questionId": "q80",
"format": "csv",
"model": "gpt-5-nano",
"expected": "Anita Kozey",
"actual": "Anita Kozey",
"isCorrect": true,
"inputTokens": 6781,
"outputTokens": 202,
"latencyMs": 4846.332458000048
},
{
"questionId": "q80",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Anita Kozey",
"actual": "Anita Kozey",
"isCorrect": true,
"inputTokens": 8414,
"outputTokens": 9,
"latencyMs": 1134.4527920000255
},
{
"questionId": "q80",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "Anita Kozey",
"actual": "Anita Kozey",
"isCorrect": true,
"inputTokens": 7838,
"outputTokens": 3,
"latencyMs": 2760.9979579999927
},
{
"questionId": "q80",
"format": "xml",
"model": "gpt-5-nano",
"expected": "Anita Kozey",
"actual": "Anita Kozey",
"isCorrect": true,
"inputTokens": 11037,
"outputTokens": 138,
"latencyMs": 4943.049208999961
},
{
"questionId": "q80",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "Anita Kozey",
"actual": "Anita Kozey",
"isCorrect": true,
"inputTokens": 13380,
"outputTokens": 9,
"latencyMs": 1163.70645899995
},
{
"questionId": "q80",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "Anita Kozey",
"actual": "Anita Kozey",
"isCorrect": true,
"inputTokens": 13451,
"outputTokens": 3,
"latencyMs": 2088.2969169999706
},
{
"questionId": "q80",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "Anita Kozey",
"actual": "Anita Kozey",
"isCorrect": true,
"inputTokens": 7373,
"outputTokens": 74,
"latencyMs": 1973.243833000015
},
{
"questionId": "q80",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Anita Kozey",
"actual": "Anita Kozey",
"isCorrect": true,
"inputTokens": 8385,
"outputTokens": 9,
"latencyMs": 1430.9339170000167
},
{
"questionId": "q80",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "Anita Kozey",
"actual": "Anita Kozey",
"isCorrect": true,
"inputTokens": 8427,
"outputTokens": 3,
"latencyMs": 1687.4137919999775
},
{
"questionId": "q81",
"format": "json",
"model": "gpt-5-nano",
"expected": "Elmer Kub PhD",
"actual": "Elmer Kub PhD",
"isCorrect": true,
"inputTokens": 9739,
"outputTokens": 203,
"latencyMs": 3178.392749999999
},
{
"questionId": "q81",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Elmer Kub PhD",
"actual": "Elmer Kub PhD",
"isCorrect": true,
"inputTokens": 11907,
"outputTokens": 10,
"latencyMs": 1213.1997499999707
},
{
"questionId": "q81",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "Elmer Kub PhD",
"actual": "Elmer Kub PhD",
"isCorrect": true,
"inputTokens": 12113,
"outputTokens": 4,
"latencyMs": 1591.6145830000169
},
{
"questionId": "q81",
"format": "toon",
"model": "gpt-5-nano",
"expected": "Elmer Kub PhD",
"actual": "Elmer Kub PhD",
"isCorrect": true,
"inputTokens": 6013,
"outputTokens": 203,
"latencyMs": 3938.462541999994
},
{
"questionId": "q81",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Elmer Kub PhD",
"actual": "Elmer Kub PhD",
"isCorrect": true,
"inputTokens": 6993,
"outputTokens": 10,
"latencyMs": 1552.203542000032
},
{
"questionId": "q81",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "Elmer Kub PhD",
"actual": "Elmer Kub PhD",
"isCorrect": true,
"inputTokens": 7201,
"outputTokens": 4,
"latencyMs": 1499.0997919999645
},
{
"questionId": "q81",
"format": "csv",
"model": "gpt-5-nano",
"expected": "Elmer Kub PhD",
"actual": "Elmer Kub PhD",
"isCorrect": true,
"inputTokens": 6781,
"outputTokens": 203,
"latencyMs": 5183.275583000039
},
{
"questionId": "q81",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Elmer Kub PhD",
"actual": "Elmer Kub PhD",
"isCorrect": true,
"inputTokens": 8414,
"outputTokens": 10,
"latencyMs": 1740.2195410000277
},
{
"questionId": "q81",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "Elmer Kub PhD",
"actual": "Elmer Kub PhD",
"isCorrect": true,
"inputTokens": 7838,
"outputTokens": 4,
"latencyMs": 3886.555624999979
},
{
"questionId": "q81",
"format": "xml",
"model": "gpt-5-nano",
"expected": "Elmer Kub PhD",
"actual": "Elmer Kub PhD",
"isCorrect": true,
"inputTokens": 11037,
"outputTokens": 203,
"latencyMs": 6655.238542000006
},
{
"questionId": "q81",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "Elmer Kub PhD",
"actual": "Elmer Kub PhD",
"isCorrect": true,
"inputTokens": 13380,
"outputTokens": 10,
"latencyMs": 1357.9108329999726
},
{
"questionId": "q81",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "Elmer Kub PhD",
"actual": "Elmer Kub PhD",
"isCorrect": true,
"inputTokens": 13451,
"outputTokens": 4,
"latencyMs": 1344.8635829999694
},
{
"questionId": "q81",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "Elmer Kub PhD",
"actual": "Elmer Kub PhD",
"isCorrect": true,
"inputTokens": 7373,
"outputTokens": 139,
"latencyMs": 10553.66091700003
},
{
"questionId": "q81",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Elmer Kub PhD",
"actual": "Elmer Kub PhD",
"isCorrect": true,
"inputTokens": 8385,
"outputTokens": 10,
"latencyMs": 1807.1954169999808
},
{
"questionId": "q81",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "Elmer Kub PhD",
"actual": "Elmer Kub PhD",
"isCorrect": true,
"inputTokens": 8427,
"outputTokens": 4,
"latencyMs": 2490.0647499999614
},
{
"questionId": "q82",
"format": "json",
"model": "gpt-5-nano",
"expected": "Maxine Zemlak",
"actual": "Maxine Zemlak",
"isCorrect": true,
"inputTokens": 9739,
"outputTokens": 138,
"latencyMs": 4916.117375000031
},
{
"questionId": "q82",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Maxine Zemlak",
"actual": "Maxine Zemlak",
"isCorrect": true,
"inputTokens": 11907,
"outputTokens": 10,
"latencyMs": 1074.780374999973
},
{
"questionId": "q82",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "Maxine Zemlak",
"actual": "Maxine Zemlak",
"isCorrect": true,
"inputTokens": 12113,
"outputTokens": 4,
"latencyMs": 1412.95891700004
},
{
"questionId": "q82",
"format": "toon",
"model": "gpt-5-nano",
"expected": "Maxine Zemlak",
"actual": "Maxine Zemlak",
"isCorrect": true,
"inputTokens": 6013,
"outputTokens": 138,
"latencyMs": 2372.7108339999686
},
{
"questionId": "q82",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Maxine Zemlak",
"actual": "Maxine Zemlak",
"isCorrect": true,
"inputTokens": 6993,
"outputTokens": 10,
"latencyMs": 1261.033374999999
},
{
"questionId": "q82",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "Maxine Zemlak",
"actual": "Maxine Zemlak",
"isCorrect": true,
"inputTokens": 7201,
"outputTokens": 4,
"latencyMs": 1507.3635420000064
},
{
"questionId": "q82",
"format": "csv",
"model": "gpt-5-nano",
"expected": "Maxine Zemlak",
"actual": "Maxine Zemlak",
"isCorrect": true,
"inputTokens": 6781,
"outputTokens": 266,
"latencyMs": 4028.793000000005
},
{
"questionId": "q82",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Maxine Zemlak",
"actual": "Maxine Zemlak",
"isCorrect": true,
"inputTokens": 8414,
"outputTokens": 10,
"latencyMs": 1685.5001250000205
},
{
"questionId": "q82",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "Maxine Zemlak",
"actual": "Maxine Zemlak",
"isCorrect": true,
"inputTokens": 7838,
"outputTokens": 4,
"latencyMs": 4534.999041000032
},
{
"questionId": "q82",
"format": "xml",
"model": "gpt-5-nano",
"expected": "Maxine Zemlak",
"actual": "Maxine Zemlak",
"isCorrect": true,
"inputTokens": 11037,
"outputTokens": 202,
"latencyMs": 3417.137708000024
},
{
"questionId": "q82",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "Maxine Zemlak",
"actual": "Maxine Zemlak",
"isCorrect": true,
"inputTokens": 13380,
"outputTokens": 10,
"latencyMs": 1361.4405830000178
},
{
"questionId": "q82",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "Maxine Zemlak",
"actual": "Maxine Zemlak",
"isCorrect": true,
"inputTokens": 13451,
"outputTokens": 4,
"latencyMs": 2432.530415999994
},
{
"questionId": "q82",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "Maxine Zemlak",
"actual": "Maxine Zemlak",
"isCorrect": true,
"inputTokens": 7373,
"outputTokens": 202,
"latencyMs": 5838.863542000006
},
{
"questionId": "q82",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Maxine Zemlak",
"actual": "Maxine Zemlak",
"isCorrect": true,
"inputTokens": 8385,
"outputTokens": 10,
"latencyMs": 1243.5272090000217
},
{
"questionId": "q82",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "Maxine Zemlak",
"actual": "Maxine Zemlak",
"isCorrect": true,
"inputTokens": 8427,
"outputTokens": 4,
"latencyMs": 3514.3164579999866
},
{
"questionId": "q83",
"format": "json",
"model": "gpt-5-nano",
"expected": "Emanuel Littel",
"actual": "Emanuel Littel",
"isCorrect": true,
"inputTokens": 9739,
"outputTokens": 202,
"latencyMs": 6595.4543330000015
},
{
"questionId": "q83",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Emanuel Littel",
"actual": "Emanuel Littel",
"isCorrect": true,
"inputTokens": 11907,
"outputTokens": 7,
"latencyMs": 1498.3081660000025
},
{
"questionId": "q83",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "Emanuel Littel",
"actual": "Emanuel Littel",
"isCorrect": true,
"inputTokens": 12113,
"outputTokens": 4,
"latencyMs": 2013.447125000006
},
{
"questionId": "q83",
"format": "toon",
"model": "gpt-5-nano",
"expected": "Emanuel Littel",
"actual": "Emanuel Littel",
"isCorrect": true,
"inputTokens": 6013,
"outputTokens": 202,
"latencyMs": 3336.2056250000023
},
{
"questionId": "q83",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Emanuel Littel",
"actual": "Emanuel Littel",
"isCorrect": true,
"inputTokens": 6993,
"outputTokens": 7,
"latencyMs": 1070.626500000013
},
{
"questionId": "q83",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "Emanuel Littel",
"actual": "Emanuel Littel",
"isCorrect": true,
"inputTokens": 7201,
"outputTokens": 4,
"latencyMs": 1394.0314590000198
},
{
"questionId": "q83",
"format": "csv",
"model": "gpt-5-nano",
"expected": "Emanuel Littel",
"actual": "Emanuel Littel",
"isCorrect": true,
"inputTokens": 6781,
"outputTokens": 266,
"latencyMs": 4194.179917000001
},
{
"questionId": "q83",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Emanuel Littel",
"actual": "Emanuel Littel",
"isCorrect": true,
"inputTokens": 8414,
"outputTokens": 7,
"latencyMs": 1139.8458330000285
},
{
"questionId": "q83",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "Emanuel Littel",
"actual": "Emanuel Littel",
"isCorrect": true,
"inputTokens": 7838,
"outputTokens": 4,
"latencyMs": 3437.878625000012
},
{
"questionId": "q83",
"format": "xml",
"model": "gpt-5-nano",
"expected": "Emanuel Littel",
"actual": "Emanuel Littel",
"isCorrect": true,
"inputTokens": 11037,
"outputTokens": 458,
"latencyMs": 13446.595333000005
},
{
"questionId": "q83",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "Emanuel Littel",
"actual": "Emanuel Littel",
"isCorrect": true,
"inputTokens": 13380,
"outputTokens": 7,
"latencyMs": 2680.581542
},
{
"questionId": "q83",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "Emanuel Littel",
"actual": "Emanuel Littel",
"isCorrect": true,
"inputTokens": 13451,
"outputTokens": 4,
"latencyMs": 1203.1962920000078
},
{
"questionId": "q83",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "Emanuel Littel",
"actual": "Emanuel Littel",
"isCorrect": true,
"inputTokens": 7373,
"outputTokens": 138,
"latencyMs": 4011.303083000006
},
{
"questionId": "q83",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Emanuel Littel",
"actual": "Emanuel Littel",
"isCorrect": true,
"inputTokens": 8385,
"outputTokens": 7,
"latencyMs": 1039.7921659999993
},
{
"questionId": "q83",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "Emanuel Littel",
"actual": "Emanuel Littel",
"isCorrect": true,
"inputTokens": 8427,
"outputTokens": 4,
"latencyMs": 2480.1701660000253
},
{
"questionId": "q84",
"format": "json",
"model": "gpt-5-nano",
"expected": "Andrew Kling",
"actual": "Andrew Kling",
"isCorrect": true,
"inputTokens": 9739,
"outputTokens": 136,
"latencyMs": 4735.566333000024
},
{
"questionId": "q84",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Andrew Kling",
"actual": "Andrew Kling",
"isCorrect": true,
"inputTokens": 11907,
"outputTokens": 7,
"latencyMs": 1280.546875
},
{
"questionId": "q84",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "Andrew Kling",
"actual": "Andrew Kling",
"isCorrect": true,
"inputTokens": 12113,
"outputTokens": 2,
"latencyMs": 1865.3758329999982
},
{
"questionId": "q84",
"format": "toon",
"model": "gpt-5-nano",
"expected": "Andrew Kling",
"actual": "Andrew Kling",
"isCorrect": true,
"inputTokens": 6013,
"outputTokens": 200,
"latencyMs": 2902.7560829999857
},
{
"questionId": "q84",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Andrew Kling",
"actual": "Andrew Kling",
"isCorrect": true,
"inputTokens": 6993,
"outputTokens": 7,
"latencyMs": 1081.401291999966
},
{
"questionId": "q84",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "Andrew Kling",
"actual": "Andrew Kling",
"isCorrect": true,
"inputTokens": 7201,
"outputTokens": 2,
"latencyMs": 1030.250207999954
},
{
"questionId": "q84",
"format": "csv",
"model": "gpt-5-nano",
"expected": "Andrew Kling",
"actual": "Andrew Kling",
"isCorrect": true,
"inputTokens": 6781,
"outputTokens": 264,
"latencyMs": 3382.8625409999513
},
{
"questionId": "q84",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Andrew Kling",
"actual": "Andrew Kling",
"isCorrect": true,
"inputTokens": 8414,
"outputTokens": 7,
"latencyMs": 1059.5115829999559
},
{
"questionId": "q84",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "Andrew Kling",
"actual": "Andrew Kling",
"isCorrect": true,
"inputTokens": 7838,
"outputTokens": 2,
"latencyMs": 4047.5788749999483
},
{
"questionId": "q84",
"format": "xml",
"model": "gpt-5-nano",
"expected": "Andrew Kling",
"actual": "Andrew Kling",
"isCorrect": true,
"inputTokens": 11037,
"outputTokens": 264,
"latencyMs": 4623.2353329999605
},
{
"questionId": "q84",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "Andrew Kling",
"actual": "Andrew Kling",
"isCorrect": true,
"inputTokens": 13380,
"outputTokens": 7,
"latencyMs": 1069.810291999951
},
{
"questionId": "q84",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "Andrew Kling",
"actual": "Andrew Kling",
"isCorrect": true,
"inputTokens": 13451,
"outputTokens": 2,
"latencyMs": 1081.8097089999937
},
{
"questionId": "q84",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "Andrew Kling",
"actual": "Andrew Kling",
"isCorrect": true,
"inputTokens": 7373,
"outputTokens": 264,
"latencyMs": 8454.222833000007
},
{
"questionId": "q84",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Andrew Kling",
"actual": "Andrew Kling",
"isCorrect": true,
"inputTokens": 8385,
"outputTokens": 7,
"latencyMs": 1248.3214579999913
},
{
"questionId": "q84",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "Andrew Kling",
"actual": "Andrew Kling",
"isCorrect": true,
"inputTokens": 8427,
"outputTokens": 2,
"latencyMs": 3052.669667000009
},
{
"questionId": "q85",
"format": "json",
"model": "gpt-5-nano",
"expected": "Morris O'Hara",
"actual": "Morris O'Hara",
"isCorrect": true,
"inputTokens": 9739,
"outputTokens": 139,
"latencyMs": 6477.822083999985
},
{
"questionId": "q85",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Morris O'Hara",
"actual": "Morris O'Hara",
"isCorrect": true,
"inputTokens": 11907,
"outputTokens": 9,
"latencyMs": 1177.795124999946
},
{
"questionId": "q85",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "Morris O'Hara",
"actual": "Morris O'Hara",
"isCorrect": true,
"inputTokens": 12113,
"outputTokens": 4,
"latencyMs": 2578.6090829999885
},
{
"questionId": "q85",
"format": "toon",
"model": "gpt-5-nano",
"expected": "Morris O'Hara",
"actual": "Morris O'Hara",
"isCorrect": true,
"inputTokens": 6013,
"outputTokens": 139,
"latencyMs": 11574.13941599999
},
{
"questionId": "q85",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Morris O'Hara",
"actual": "Morris O'Hara",
"isCorrect": true,
"inputTokens": 6993,
"outputTokens": 9,
"latencyMs": 1197.251500000013
},
{
"questionId": "q85",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "Morris O'Hara",
"actual": "Morris O'Hara",
"isCorrect": true,
"inputTokens": 7201,
"outputTokens": 4,
"latencyMs": 902.3842500000028
},
{
"questionId": "q85",
"format": "csv",
"model": "gpt-5-nano",
"expected": "Morris O'Hara",
"actual": "Morris O'Hara",
"isCorrect": true,
"inputTokens": 6781,
"outputTokens": 267,
"latencyMs": 5139.725291999988
},
{
"questionId": "q85",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Morris O'Hara",
"actual": "Morris O'Hara",
"isCorrect": true,
"inputTokens": 8414,
"outputTokens": 9,
"latencyMs": 1539.0101670000004
},
{
"questionId": "q85",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "Morris O'Hara",
"actual": "Morris O'Hara",
"isCorrect": true,
"inputTokens": 7838,
"outputTokens": 4,
"latencyMs": 5590.813292000035
},
{
"questionId": "q85",
"format": "xml",
"model": "gpt-5-nano",
"expected": "Morris O'Hara",
"actual": "Morris O'Hara",
"isCorrect": true,
"inputTokens": 11037,
"outputTokens": 459,
"latencyMs": 5332.691916999989
},
{
"questionId": "q85",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "Morris O'Hara",
"actual": "Morris O'Hara",
"isCorrect": true,
"inputTokens": 13380,
"outputTokens": 9,
"latencyMs": 1692.4654169999994
},
{
"questionId": "q85",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "Morris O'Hara",
"actual": "Morris O'Hara",
"isCorrect": true,
"inputTokens": 13451,
"outputTokens": 4,
"latencyMs": 981.0666250000359
},
{
"questionId": "q85",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "Morris O'Hara",
"actual": "Morris O'Hara",
"isCorrect": true,
"inputTokens": 7373,
"outputTokens": 331,
"latencyMs": 4571.373957999982
},
{
"questionId": "q85",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Morris O'Hara",
"actual": "Morris O'Hara",
"isCorrect": true,
"inputTokens": 8385,
"outputTokens": 9,
"latencyMs": 1186.5836659999914
},
{
"questionId": "q85",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "Morris O'Hara",
"actual": "Morris O'Hara",
"isCorrect": true,
"inputTokens": 8427,
"outputTokens": 4,
"latencyMs": 3083.60266699997
},
{
"questionId": "q86",
"format": "json",
"model": "gpt-5-nano",
"expected": "Elijah Franecki",
"actual": "Elijah Franecki",
"isCorrect": true,
"inputTokens": 9739,
"outputTokens": 203,
"latencyMs": 6090.284833999991
},
{
"questionId": "q86",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Elijah Franecki",
"actual": "Elijah Franecki",
"isCorrect": true,
"inputTokens": 11907,
"outputTokens": 9,
"latencyMs": 1271.532459000009
},
{
"questionId": "q86",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "Elijah Franecki",
"actual": "Elijah Franecki",
"isCorrect": true,
"inputTokens": 12113,
"outputTokens": 5,
"latencyMs": 1557.2529580000555
},
{
"questionId": "q86",
"format": "toon",
"model": "gpt-5-nano",
"expected": "Elijah Franecki",
"actual": "Elijah Franecki",
"isCorrect": true,
"inputTokens": 6013,
"outputTokens": 203,
"latencyMs": 3250.3466250000056
},
{
"questionId": "q86",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Elijah Franecki",
"actual": "Elijah Franecki",
"isCorrect": true,
"inputTokens": 6993,
"outputTokens": 9,
"latencyMs": 1201.9044580000336
},
{
"questionId": "q86",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "Elijah Franecki",
"actual": "Elijah Franecki",
"isCorrect": true,
"inputTokens": 7201,
"outputTokens": 5,
"latencyMs": 874.0206250000047
},
{
"questionId": "q86",
"format": "csv",
"model": "gpt-5-nano",
"expected": "Elijah Franecki",
"actual": "Elijah Franecki",
"isCorrect": true,
"inputTokens": 6781,
"outputTokens": 203,
"latencyMs": 9473.656583999982
},
{
"questionId": "q86",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Elijah Franecki",
"actual": "Elijah Franecki",
"isCorrect": true,
"inputTokens": 8414,
"outputTokens": 9,
"latencyMs": 1253.2470420000027
},
{
"questionId": "q86",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "Elijah Franecki",
"actual": "Elijah Franecki",
"isCorrect": true,
"inputTokens": 7838,
"outputTokens": 5,
"latencyMs": 2383.5771250000107
},
{
"questionId": "q86",
"format": "xml",
"model": "gpt-5-nano",
"expected": "Elijah Franecki",
"actual": "Elijah Franecki",
"isCorrect": true,
"inputTokens": 11037,
"outputTokens": 267,
"latencyMs": 6551.133333000005
},
{
"questionId": "q86",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "Elijah Franecki",
"actual": "Elijah Franecki",
"isCorrect": true,
"inputTokens": 13380,
"outputTokens": 9,
"latencyMs": 1116.6841669999994
},
{
"questionId": "q86",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "Elijah Franecki",
"actual": "Elijah Franecki",
"isCorrect": true,
"inputTokens": 13451,
"outputTokens": 5,
"latencyMs": 2014.7545000000391
},
{
"questionId": "q86",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "Elijah Franecki",
"actual": "Elijah Franecki",
"isCorrect": true,
"inputTokens": 7373,
"outputTokens": 75,
"latencyMs": 2472.76654099999
},
{
"questionId": "q86",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Elijah Franecki",
"actual": "Elijah Franecki",
"isCorrect": true,
"inputTokens": 8385,
"outputTokens": 9,
"latencyMs": 1175.5650410000235
},
{
"questionId": "q86",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "Elijah Franecki",
"actual": "Elijah Franecki",
"isCorrect": true,
"inputTokens": 8427,
"outputTokens": 5,
"latencyMs": 1389.2444590000086
},
{
"questionId": "q87",
"format": "json",
"model": "gpt-5-nano",
"expected": "Malcolm Erdman",
"actual": "Malcolm Erdman",
"isCorrect": true,
"inputTokens": 9739,
"outputTokens": 266,
"latencyMs": 4308.579541000014
},
{
"questionId": "q87",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Malcolm Erdman",
"actual": "Malcolm Erdman",
"isCorrect": true,
"inputTokens": 11907,
"outputTokens": 7,
"latencyMs": 1423.6036659999518
},
{
"questionId": "q87",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "Malcolm Erdman",
"actual": "Malcolm Erdman",
"isCorrect": true,
"inputTokens": 12113,
"outputTokens": 3,
"latencyMs": 2240.639916999964
},
{
"questionId": "q87",
"format": "toon",
"model": "gpt-5-nano",
"expected": "Malcolm Erdman",
"actual": "Malcolm Erdman",
"isCorrect": true,
"inputTokens": 6013,
"outputTokens": 202,
"latencyMs": 3581.8104590000003
},
{
"questionId": "q87",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Malcolm Erdman",
"actual": "Malcolm Erdman",
"isCorrect": true,
"inputTokens": 6993,
"outputTokens": 7,
"latencyMs": 1104.380625000049
},
{
"questionId": "q87",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "Malcolm Erdman",
"actual": "Malcolm Erdman",
"isCorrect": true,
"inputTokens": 7201,
"outputTokens": 3,
"latencyMs": 1940.0862910000142
},
{
"questionId": "q87",
"format": "csv",
"model": "gpt-5-nano",
"expected": "Malcolm Erdman",
"actual": "Malcolm Erdman",
"isCorrect": true,
"inputTokens": 6781,
"outputTokens": 202,
"latencyMs": 4205.585124999983
},
{
"questionId": "q87",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Malcolm Erdman",
"actual": "Malcolm Erdman",
"isCorrect": true,
"inputTokens": 8414,
"outputTokens": 7,
"latencyMs": 1249.4729159999988
},
{
"questionId": "q87",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "Malcolm Erdman",
"actual": "Malcolm Erdman",
"isCorrect": true,
"inputTokens": 7838,
"outputTokens": 3,
"latencyMs": 3377.5699580000364
},
{
"questionId": "q87",
"format": "xml",
"model": "gpt-5-nano",
"expected": "Malcolm Erdman",
"actual": "Malcolm Erdman",
"isCorrect": true,
"inputTokens": 11037,
"outputTokens": 266,
"latencyMs": 4378.770917000016
},
{
"questionId": "q87",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "Malcolm Erdman",
"actual": "Malcolm Erdman",
"isCorrect": true,
"inputTokens": 13380,
"outputTokens": 7,
"latencyMs": 1283.0947499999893
},
{
"questionId": "q87",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "Malcolm Erdman",
"actual": "Malcolm Erdman",
"isCorrect": true,
"inputTokens": 13451,
"outputTokens": 3,
"latencyMs": 1649.8935409999685
},
{
"questionId": "q87",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "Malcolm Erdman",
"actual": "Malcolm Erdman",
"isCorrect": true,
"inputTokens": 7373,
"outputTokens": 138,
"latencyMs": 4596.174417000031
},
{
"questionId": "q87",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Malcolm Erdman",
"actual": "Malcolm Erdman",
"isCorrect": true,
"inputTokens": 8385,
"outputTokens": 7,
"latencyMs": 1117.4153749999823
},
{
"questionId": "q87",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "Malcolm Erdman",
"actual": "Malcolm Erdman",
"isCorrect": true,
"inputTokens": 8427,
"outputTokens": 3,
"latencyMs": 2916.328375000041
},
{
"questionId": "q88",
"format": "json",
"model": "gpt-5-nano",
"expected": "Fannie Skiles",
"actual": "Fannie Skiles",
"isCorrect": true,
"inputTokens": 9739,
"outputTokens": 202,
"latencyMs": 6150.88295900001
},
{
"questionId": "q88",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Fannie Skiles",
"actual": "Fannie Skiles",
"isCorrect": true,
"inputTokens": 11907,
"outputTokens": 9,
"latencyMs": 3154.254249999998
},
{
"questionId": "q88",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "Fannie Skiles",
"actual": "Fannie Skiles",
"isCorrect": true,
"inputTokens": 12113,
"outputTokens": 4,
"latencyMs": 1595.2374999999884
},
{
"questionId": "q88",
"format": "toon",
"model": "gpt-5-nano",
"expected": "Fannie Skiles",
"actual": "Fannie Skiles",
"isCorrect": true,
"inputTokens": 6013,
"outputTokens": 138,
"latencyMs": 2656.5287499999977
},
{
"questionId": "q88",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Fannie Skiles",
"actual": "Fannie Skiles",
"isCorrect": true,
"inputTokens": 6993,
"outputTokens": 9,
"latencyMs": 1990.0005419999943
},
{
"questionId": "q88",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "Fannie Skiles",
"actual": "Fannie Skiles",
"isCorrect": true,
"inputTokens": 7201,
"outputTokens": 4,
"latencyMs": 2321.1809169999906
},
{
"questionId": "q88",
"format": "csv",
"model": "gpt-5-nano",
"expected": "Fannie Skiles",
"actual": "Fannie Skiles",
"isCorrect": true,
"inputTokens": 6781,
"outputTokens": 266,
"latencyMs": 3915.817207999993
},
{
"questionId": "q88",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Fannie Skiles",
"actual": "Fannie Skiles",
"isCorrect": true,
"inputTokens": 8414,
"outputTokens": 9,
"latencyMs": 1246.5829580000136
},
{
"questionId": "q88",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "Fannie Skiles",
"actual": "Fannie Skiles",
"isCorrect": true,
"inputTokens": 7838,
"outputTokens": 4,
"latencyMs": 4516.533583000011
},
{
"questionId": "q88",
"format": "xml",
"model": "gpt-5-nano",
"expected": "Fannie Skiles",
"actual": "Fannie Skiles",
"isCorrect": true,
"inputTokens": 11037,
"outputTokens": 202,
"latencyMs": 5059.808416999993
},
{
"questionId": "q88",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "Fannie Skiles",
"actual": "Fannie Skiles",
"isCorrect": true,
"inputTokens": 13380,
"outputTokens": 9,
"latencyMs": 1927.3214579999913
},
{
"questionId": "q88",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "Fannie Skiles",
"actual": "Fannie Skiles",
"isCorrect": true,
"inputTokens": 13451,
"outputTokens": 4,
"latencyMs": 1175.4753750000382
},
{
"questionId": "q88",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "Fannie Skiles",
"actual": "Fannie Skiles",
"isCorrect": true,
"inputTokens": 7373,
"outputTokens": 138,
"latencyMs": 6212.469625000027
},
{
"questionId": "q88",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Fannie Skiles",
"actual": "Fannie Skiles",
"isCorrect": true,
"inputTokens": 8385,
"outputTokens": 9,
"latencyMs": 1526.3683329999913
},
{
"questionId": "q88",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "Fannie Skiles",
"actual": "Fannie Skiles",
"isCorrect": true,
"inputTokens": 8427,
"outputTokens": 4,
"latencyMs": 3560.557833000028
},
{
"questionId": "q89",
"format": "json",
"model": "gpt-5-nano",
"expected": "Sonja Emmerich",
"actual": "Sonja Emmerich",
"isCorrect": true,
"inputTokens": 9739,
"outputTokens": 331,
"latencyMs": 4333.316457999987
},
{
"questionId": "q89",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Sonja Emmerich",
"actual": "Sonja Emmerich",
"isCorrect": true,
"inputTokens": 11907,
"outputTokens": 10,
"latencyMs": 1150.7639999999665
},
{
"questionId": "q89",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "Sonja Emmerich",
"actual": "Sonja Emmerich",
"isCorrect": true,
"inputTokens": 12113,
"outputTokens": 4,
"latencyMs": 2529.932083999971
},
{
"questionId": "q89",
"format": "toon",
"model": "gpt-5-nano",
"expected": "Sonja Emmerich",
"actual": "Sonja Emmerich",
"isCorrect": true,
"inputTokens": 6013,
"outputTokens": 203,
"latencyMs": 3581.042041000037
},
{
"questionId": "q89",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Sonja Emmerich",
"actual": "Sonja Emmerich",
"isCorrect": true,
"inputTokens": 6993,
"outputTokens": 10,
"latencyMs": 1568.8872919999994
},
{
"questionId": "q89",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "Sonja Emmerich",
"actual": "Sonja Emmerich",
"isCorrect": true,
"inputTokens": 7201,
"outputTokens": 4,
"latencyMs": 1319.7952499999665
},
{
"questionId": "q89",
"format": "csv",
"model": "gpt-5-nano",
"expected": "Sonja Emmerich",
"actual": "Sonja Emmerich",
"isCorrect": true,
"inputTokens": 6781,
"outputTokens": 331,
"latencyMs": 3538.970499999996
},
{
"questionId": "q89",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Sonja Emmerich",
"actual": "Sonja Emmerich",
"isCorrect": true,
"inputTokens": 8414,
"outputTokens": 10,
"latencyMs": 1241.5265000000363
},
{
"questionId": "q89",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "Sonja Emmerich",
"actual": "Sonja Emmerich",
"isCorrect": true,
"inputTokens": 7838,
"outputTokens": 4,
"latencyMs": 3917.9875000000466
},
{
"questionId": "q89",
"format": "xml",
"model": "gpt-5-nano",
"expected": "Sonja Emmerich",
"actual": "Sonja Emmerich",
"isCorrect": true,
"inputTokens": 11037,
"outputTokens": 395,
"latencyMs": 7058.911167000013
},
{
"questionId": "q89",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "Sonja Emmerich",
"actual": "Sonja Emmerich",
"isCorrect": true,
"inputTokens": 13380,
"outputTokens": 10,
"latencyMs": 1205.0128329999861
},
{
"questionId": "q89",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "Sonja Emmerich",
"actual": "Sonja Emmerich",
"isCorrect": true,
"inputTokens": 13451,
"outputTokens": 4,
"latencyMs": 1415.7616670000134
},
{
"questionId": "q89",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "Sonja Emmerich",
"actual": "Sonja Emmerich",
"isCorrect": true,
"inputTokens": 7373,
"outputTokens": 139,
"latencyMs": 2635.5764160000253
},
{
"questionId": "q89",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Sonja Emmerich",
"actual": "Sonja Emmerich",
"isCorrect": true,
"inputTokens": 8385,
"outputTokens": 10,
"latencyMs": 1153.0579160000198
},
{
"questionId": "q89",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "Sonja Emmerich",
"actual": "Sonja Emmerich",
"isCorrect": true,
"inputTokens": 8427,
"outputTokens": 4,
"latencyMs": 2894.0762920000125
},
{
"questionId": "q90",
"format": "json",
"model": "gpt-5-nano",
"expected": "Frank Emmerich DVM",
"actual": "Frank Emmerich DVM",
"isCorrect": true,
"inputTokens": 9739,
"outputTokens": 140,
"latencyMs": 6845.755584000028
},
{
"questionId": "q90",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Frank Emmerich DVM",
"actual": "Frank Emmerich DVM",
"isCorrect": true,
"inputTokens": 11907,
"outputTokens": 10,
"latencyMs": 2363.831957999966
},
{
"questionId": "q90",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "Frank Emmerich DVM",
"actual": "Frank Emmerich DVM",
"isCorrect": true,
"inputTokens": 12113,
"outputTokens": 5,
"latencyMs": 2646.4628749999683
},
{
"questionId": "q90",
"format": "toon",
"model": "gpt-5-nano",
"expected": "Frank Emmerich DVM",
"actual": "Frank Emmerich DVM",
"isCorrect": true,
"inputTokens": 6013,
"outputTokens": 140,
"latencyMs": 2236.9238749999786
},
{
"questionId": "q90",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Frank Emmerich DVM",
"actual": "Frank Emmerich DVM",
"isCorrect": true,
"inputTokens": 6993,
"outputTokens": 10,
"latencyMs": 1023.8160830000415
},
{
"questionId": "q90",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "Frank Emmerich DVM",
"actual": "Frank Emmerich DVM",
"isCorrect": true,
"inputTokens": 7201,
"outputTokens": 5,
"latencyMs": 1165.2285000000265
},
{
"questionId": "q90",
"format": "csv",
"model": "gpt-5-nano",
"expected": "Frank Emmerich DVM",
"actual": "Frank Emmerich DVM",
"isCorrect": true,
"inputTokens": 6781,
"outputTokens": 268,
"latencyMs": 4066.1428750000196
},
{
"questionId": "q90",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Frank Emmerich DVM",
"actual": "Frank Emmerich DVM",
"isCorrect": true,
"inputTokens": 8414,
"outputTokens": 10,
"latencyMs": 1570.4565409999923
},
{
"questionId": "q90",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "Frank Emmerich DVM",
"actual": "Frank Emmerich DVM",
"isCorrect": true,
"inputTokens": 7838,
"outputTokens": 5,
"latencyMs": 3472.6348330000183
},
{
"questionId": "q90",
"format": "xml",
"model": "gpt-5-nano",
"expected": "Frank Emmerich DVM",
"actual": "Frank Emmerich DVM",
"isCorrect": true,
"inputTokens": 11037,
"outputTokens": 268,
"latencyMs": 3361.3982500000275
},
{
"questionId": "q90",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "Frank Emmerich DVM",
"actual": "Frank Emmerich DVM",
"isCorrect": true,
"inputTokens": 13380,
"outputTokens": 10,
"latencyMs": 1247.454334000009
},
{
"questionId": "q90",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "Frank Emmerich DVM",
"actual": "Frank Emmerich DVM",
"isCorrect": true,
"inputTokens": 13451,
"outputTokens": 5,
"latencyMs": 1382.5874590000021
},
{
"questionId": "q90",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "Frank Emmerich DVM",
"actual": "Frank Emmerich DVM",
"isCorrect": true,
"inputTokens": 7373,
"outputTokens": 140,
"latencyMs": 2949.110708000022
},
{
"questionId": "q90",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Frank Emmerich DVM",
"actual": "Frank Emmerich DVM",
"isCorrect": true,
"inputTokens": 8385,
"outputTokens": 10,
"latencyMs": 1160.699499999988
},
{
"questionId": "q90",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "Frank Emmerich DVM",
"actual": "Frank Emmerich DVM",
"isCorrect": true,
"inputTokens": 8427,
"outputTokens": 5,
"latencyMs": 3016.852790999983
},
{
"questionId": "q91",
"format": "json",
"model": "gpt-5-nano",
"expected": "Ronald Collins",
"actual": "Ronald Collins",
"isCorrect": true,
"inputTokens": 9739,
"outputTokens": 73,
"latencyMs": 2769.32262500003
},
{
"questionId": "q91",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Ronald Collins",
"actual": "Ronald Collins",
"isCorrect": true,
"inputTokens": 11907,
"outputTokens": 5,
"latencyMs": 1252.1112919999869
},
{
"questionId": "q91",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "Ronald Collins",
"actual": "Ronald Collins",
"isCorrect": true,
"inputTokens": 12113,
"outputTokens": 2,
"latencyMs": 1906.2817499999655
},
{
"questionId": "q91",
"format": "toon",
"model": "gpt-5-nano",
"expected": "Ronald Collins",
"actual": "Ronald Collins",
"isCorrect": true,
"inputTokens": 6013,
"outputTokens": 201,
"latencyMs": 5391.403708000027
},
{
"questionId": "q91",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Ronald Collins",
"actual": "Ronald Collins",
"isCorrect": true,
"inputTokens": 6993,
"outputTokens": 5,
"latencyMs": 1126.4195000000182
},
{
"questionId": "q91",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "Ronald Collins",
"actual": "Ronald Collins",
"isCorrect": true,
"inputTokens": 7201,
"outputTokens": 2,
"latencyMs": 1148.1653749999823
},
{
"questionId": "q91",
"format": "csv",
"model": "gpt-5-nano",
"expected": "Ronald Collins",
"actual": "Ronald Collins",
"isCorrect": true,
"inputTokens": 6781,
"outputTokens": 265,
"latencyMs": 3649.6608329999726
},
{
"questionId": "q91",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Ronald Collins",
"actual": "Ronald Collins",
"isCorrect": true,
"inputTokens": 8414,
"outputTokens": 5,
"latencyMs": 1054.9641670000274
},
{
"questionId": "q91",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "Ronald Collins",
"actual": "Ronald Collins",
"isCorrect": true,
"inputTokens": 7838,
"outputTokens": 2,
"latencyMs": 4520.085083000013
},
{
"questionId": "q91",
"format": "xml",
"model": "gpt-5-nano",
"expected": "Ronald Collins",
"actual": "Ronald Collins",
"isCorrect": true,
"inputTokens": 11037,
"outputTokens": 137,
"latencyMs": 3783.5575830000453
},
{
"questionId": "q91",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "Ronald Collins",
"actual": "Ronald Collins",
"isCorrect": true,
"inputTokens": 13380,
"outputTokens": 5,
"latencyMs": 1200.0155000000377
},
{
"questionId": "q91",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "Ronald Collins",
"actual": "Ronald Collins",
"isCorrect": true,
"inputTokens": 13451,
"outputTokens": 2,
"latencyMs": 1914.0702499999898
},
{
"questionId": "q91",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "Ronald Collins",
"actual": "Ronald Collins",
"isCorrect": true,
"inputTokens": 7373,
"outputTokens": 265,
"latencyMs": 8789.486250000016
},
{
"questionId": "q91",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Ronald Collins",
"actual": "Ronald Collins",
"isCorrect": true,
"inputTokens": 8385,
"outputTokens": 5,
"latencyMs": 1445.0254999999888
},
{
"questionId": "q91",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "Ronald Collins",
"actual": "Ronald Collins",
"isCorrect": true,
"inputTokens": 8427,
"outputTokens": 2,
"latencyMs": 3330.7725830000127
},
{
"questionId": "q92",
"format": "json",
"model": "gpt-5-nano",
"expected": "Jeannie Klein",
"actual": "Jeannie Klein",
"isCorrect": true,
"inputTokens": 9739,
"outputTokens": 201,
"latencyMs": 6413.151542000007
},
{
"questionId": "q92",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Jeannie Klein",
"actual": "Jeannie Klein",
"isCorrect": true,
"inputTokens": 11907,
"outputTokens": 8,
"latencyMs": 1204.1578749999753
},
{
"questionId": "q92",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "Jeannie Klein",
"actual": "Jeannie Klein",
"isCorrect": true,
"inputTokens": 12113,
"outputTokens": 3,
"latencyMs": 1412.2799170000362
},
{
"questionId": "q92",
"format": "toon",
"model": "gpt-5-nano",
"expected": "Jeannie Klein",
"actual": "Jeannie Klein",
"isCorrect": true,
"inputTokens": 6013,
"outputTokens": 137,
"latencyMs": 2630.434041999979
},
{
"questionId": "q92",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Jeannie Klein",
"actual": "Jeannie Klein",
"isCorrect": true,
"inputTokens": 6993,
"outputTokens": 8,
"latencyMs": 1546.8669579999987
},
{
"questionId": "q92",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "Jeannie Klein",
"actual": "Jeannie Klein",
"isCorrect": true,
"inputTokens": 7201,
"outputTokens": 3,
"latencyMs": 2373.892125000013
},
{
"questionId": "q92",
"format": "csv",
"model": "gpt-5-nano",
"expected": "Jeannie Klein",
"actual": "Jeannie Klein",
"isCorrect": true,
"inputTokens": 6781,
"outputTokens": 201,
"latencyMs": 3202.2820420000353
},
{
"questionId": "q92",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Jeannie Klein",
"actual": "Jeannie Klein",
"isCorrect": true,
"inputTokens": 8414,
"outputTokens": 8,
"latencyMs": 1227.2948330000509
},
{
"questionId": "q92",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "Jeannie Klein",
"actual": "Jeannie Klein",
"isCorrect": true,
"inputTokens": 7838,
"outputTokens": 3,
"latencyMs": 3743.526792000048
},
{
"questionId": "q92",
"format": "xml",
"model": "gpt-5-nano",
"expected": "Jeannie Klein",
"actual": "Jeannie Klein",
"isCorrect": true,
"inputTokens": 11037,
"outputTokens": 201,
"latencyMs": 3238.171458000026
},
{
"questionId": "q92",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "Jeannie Klein",
"actual": "Jeannie Klein",
"isCorrect": true,
"inputTokens": 13380,
"outputTokens": 8,
"latencyMs": 1180.7857080000103
},
{
"questionId": "q92",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "Jeannie Klein",
"actual": "Jeannie Klein",
"isCorrect": true,
"inputTokens": 13451,
"outputTokens": 3,
"latencyMs": 1142.4927089999546
},
{
"questionId": "q92",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "Jeannie Klein",
"actual": "Jeannie Klein",
"isCorrect": true,
"inputTokens": 7373,
"outputTokens": 137,
"latencyMs": 3021.9724590000114
},
{
"questionId": "q92",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Jeannie Klein",
"actual": "Jeannie Klein",
"isCorrect": true,
"inputTokens": 8385,
"outputTokens": 8,
"latencyMs": 1821.3516250000102
},
{
"questionId": "q92",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "Jeannie Klein",
"actual": "Jeannie Klein",
"isCorrect": true,
"inputTokens": 8427,
"outputTokens": 3,
"latencyMs": 2796.1425000000163
},
{
"questionId": "q93",
"format": "json",
"model": "gpt-5-nano",
"expected": "Joshua Watsica",
"actual": "Joshua Watsica",
"isCorrect": true,
"inputTokens": 9739,
"outputTokens": 138,
"latencyMs": 2788.065082999994
},
{
"questionId": "q93",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Joshua Watsica",
"actual": "Joshua Watsica",
"isCorrect": true,
"inputTokens": 11907,
"outputTokens": 8,
"latencyMs": 1367.4712089999812
},
{
"questionId": "q93",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "Joshua Watsica",
"actual": "Joshua Watsica",
"isCorrect": true,
"inputTokens": 12113,
"outputTokens": 4,
"latencyMs": 1443.3402910000295
},
{
"questionId": "q93",
"format": "toon",
"model": "gpt-5-nano",
"expected": "Joshua Watsica",
"actual": "Joshua Watsica",
"isCorrect": true,
"inputTokens": 6013,
"outputTokens": 202,
"latencyMs": 3654.0896250000224
},
{
"questionId": "q93",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Joshua Watsica",
"actual": "Joshua Watsica",
"isCorrect": true,
"inputTokens": 6993,
"outputTokens": 8,
"latencyMs": 1028.997875000001
},
{
"questionId": "q93",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "Joshua Watsica",
"actual": "Joshua Watsica",
"isCorrect": true,
"inputTokens": 7201,
"outputTokens": 4,
"latencyMs": 996.1445419999654
},
{
"questionId": "q93",
"format": "csv",
"model": "gpt-5-nano",
"expected": "Joshua Watsica",
"actual": "Joshua Watsica",
"isCorrect": true,
"inputTokens": 6781,
"outputTokens": 266,
"latencyMs": 6677.9684579999885
},
{
"questionId": "q93",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Joshua Watsica",
"actual": "Joshua Watsica",
"isCorrect": true,
"inputTokens": 8414,
"outputTokens": 8,
"latencyMs": 1639.9640409999993
},
{
"questionId": "q93",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "Joshua Watsica",
"actual": "Joshua Watsica",
"isCorrect": true,
"inputTokens": 7838,
"outputTokens": 4,
"latencyMs": 1652.2167079999927
},
{
"questionId": "q93",
"format": "xml",
"model": "gpt-5-nano",
"expected": "Joshua Watsica",
"actual": "Joshua Watsica",
"isCorrect": true,
"inputTokens": 11037,
"outputTokens": 202,
"latencyMs": 3802.7754580000183
},
{
"questionId": "q93",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "Joshua Watsica",
"actual": "Joshua Watsica",
"isCorrect": true,
"inputTokens": 13380,
"outputTokens": 8,
"latencyMs": 3327.393792000017
},
{
"questionId": "q93",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "Joshua Watsica",
"actual": "Joshua Watsica",
"isCorrect": true,
"inputTokens": 13451,
"outputTokens": 4,
"latencyMs": 1257.9510420000297
},
{
"questionId": "q93",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "Joshua Watsica",
"actual": "Joshua Watsica",
"isCorrect": true,
"inputTokens": 7373,
"outputTokens": 202,
"latencyMs": 3074.6058750000084
},
{
"questionId": "q93",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Joshua Watsica",
"actual": "Joshua Watsica",
"isCorrect": true,
"inputTokens": 8385,
"outputTokens": 8,
"latencyMs": 1146.4290829999954
},
{
"questionId": "q93",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "Joshua Watsica",
"actual": "Joshua Watsica",
"isCorrect": true,
"inputTokens": 8427,
"outputTokens": 4,
"latencyMs": 1712.0292920000502
},
{
"questionId": "q94",
"format": "json",
"model": "gpt-5-nano",
"expected": "10",
"actual": "10",
"isCorrect": true,
"inputTokens": 9735,
"outputTokens": 967,
"latencyMs": 11158.31029200001
},
{
"questionId": "q94",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "8",
"isCorrect": false,
"inputTokens": 11902,
"outputTokens": 5,
"latencyMs": 1969.3274160000146
},
{
"questionId": "q94",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "10",
"actual": "9",
"isCorrect": false,
"inputTokens": 12107,
"outputTokens": 1,
"latencyMs": 1012.6363329999731
},
{
"questionId": "q94",
"format": "toon",
"model": "gpt-5-nano",
"expected": "10",
"actual": "10",
"isCorrect": true,
"inputTokens": 6009,
"outputTokens": 839,
"latencyMs": 12387.267332999967
},
{
"questionId": "q94",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "8",
"isCorrect": false,
"inputTokens": 6988,
"outputTokens": 5,
"latencyMs": 1146.578125
},
{
"questionId": "q94",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "10",
"actual": "10",
"isCorrect": true,
"inputTokens": 7195,
"outputTokens": 2,
"latencyMs": 6065.854290999996
},
{
"questionId": "q94",
"format": "csv",
"model": "gpt-5-nano",
"expected": "10",
"actual": "10",
"isCorrect": true,
"inputTokens": 6777,
"outputTokens": 583,
"latencyMs": 5722.737124999985
},
{
"questionId": "q94",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "8",
"isCorrect": false,
"inputTokens": 8409,
"outputTokens": 5,
"latencyMs": 1162.2037910000072
},
{
"questionId": "q94",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "10",
"actual": "10",
"isCorrect": true,
"inputTokens": 7832,
"outputTokens": 2,
"latencyMs": 5346.4215829999885
},
{
"questionId": "q94",
"format": "xml",
"model": "gpt-5-nano",
"expected": "10",
"actual": "10",
"isCorrect": true,
"inputTokens": 11033,
"outputTokens": 967,
"latencyMs": 9711.181042000011
},
{
"questionId": "q94",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "8",
"isCorrect": false,
"inputTokens": 13375,
"outputTokens": 5,
"latencyMs": 1180.9850839999854
},
{
"questionId": "q94",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "10",
"actual": "10",
"isCorrect": true,
"inputTokens": 13445,
"outputTokens": 2,
"latencyMs": 6629.622541000019
},
{
"questionId": "q94",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "10",
"actual": "10",
"isCorrect": true,
"inputTokens": 7369,
"outputTokens": 583,
"latencyMs": 5019.671374999976
},
{
"questionId": "q94",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "8",
"isCorrect": false,
"inputTokens": 8380,
"outputTokens": 5,
"latencyMs": 1167.7568749999627
},
{
"questionId": "q94",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "10",
"actual": "9",
"isCorrect": false,
"inputTokens": 8421,
"outputTokens": 1,
"latencyMs": 1625.168708000041
},
{
"questionId": "q95",
"format": "json",
"model": "gpt-5-nano",
"expected": "10",
"actual": "10",
"isCorrect": true,
"inputTokens": 9735,
"outputTokens": 775,
"latencyMs": 7411.724082999979
},
{
"questionId": "q95",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "8",
"isCorrect": false,
"inputTokens": 11902,
"outputTokens": 5,
"latencyMs": 1554.4648750000051
},
{
"questionId": "q95",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "10",
"actual": "10",
"isCorrect": true,
"inputTokens": 12107,
"outputTokens": 2,
"latencyMs": 2038.4110000000219
},
{
"questionId": "q95",
"format": "toon",
"model": "gpt-5-nano",
"expected": "10",
"actual": "10",
"isCorrect": true,
"inputTokens": 6009,
"outputTokens": 455,
"latencyMs": 8813.801208000048
},
{
"questionId": "q95",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "7",
"isCorrect": false,
"inputTokens": 6988,
"outputTokens": 5,
"latencyMs": 1344.8304580000113
},
{
"questionId": "q95",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "10",
"actual": "9",
"isCorrect": false,
"inputTokens": 7195,
"outputTokens": 1,
"latencyMs": 795.6426249999786
},
{
"questionId": "q95",
"format": "csv",
"model": "gpt-5-nano",
"expected": "10",
"actual": "10",
"isCorrect": true,
"inputTokens": 6777,
"outputTokens": 903,
"latencyMs": 9739.22754199995
},
{
"questionId": "q95",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "8",
"isCorrect": false,
"inputTokens": 8409,
"outputTokens": 5,
"latencyMs": 1163.627124999999
},
{
"questionId": "q95",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "10",
"actual": "10",
"isCorrect": true,
"inputTokens": 7832,
"outputTokens": 2,
"latencyMs": 4444.457624999981
},
{
"questionId": "q95",
"format": "xml",
"model": "gpt-5-nano",
"expected": "10",
"actual": "10",
"isCorrect": true,
"inputTokens": 11033,
"outputTokens": 1415,
"latencyMs": 14405.558917000017
},
{
"questionId": "q95",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "8",
"isCorrect": false,
"inputTokens": 13375,
"outputTokens": 5,
"latencyMs": 1603.5181249999441
},
{
"questionId": "q95",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "10",
"actual": "9",
"isCorrect": false,
"inputTokens": 13445,
"outputTokens": 1,
"latencyMs": 1466.009625000006
},
{
"questionId": "q95",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "10",
"actual": "10",
"isCorrect": true,
"inputTokens": 7369,
"outputTokens": 583,
"latencyMs": 50147.72520799999
},
{
"questionId": "q95",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "8",
"isCorrect": false,
"inputTokens": 8380,
"outputTokens": 5,
"latencyMs": 1600.4076660000137
},
{
"questionId": "q95",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "10",
"actual": "9",
"isCorrect": false,
"inputTokens": 8421,
"outputTokens": 1,
"latencyMs": 1974.6425419999869
},
{
"questionId": "q96",
"format": "json",
"model": "gpt-5-nano",
"expected": "10",
"actual": "10",
"isCorrect": true,
"inputTokens": 9736,
"outputTokens": 839,
"latencyMs": 6029.78350000002
},
{
"questionId": "q96",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "8",
"isCorrect": false,
"inputTokens": 11902,
"outputTokens": 5,
"latencyMs": 1108.4398330000113
},
{
"questionId": "q96",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "10",
"actual": "8",
"isCorrect": false,
"inputTokens": 12108,
"outputTokens": 1,
"latencyMs": 1581.965291999979
},
{
"questionId": "q96",
"format": "toon",
"model": "gpt-5-nano",
"expected": "10",
"actual": "10",
"isCorrect": true,
"inputTokens": 6010,
"outputTokens": 647,
"latencyMs": 21748.776332999987
},
{
"questionId": "q96",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "7",
"isCorrect": false,
"inputTokens": 6988,
"outputTokens": 5,
"latencyMs": 2333.9817080000066
},
{
"questionId": "q96",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "10",
"actual": "8",
"isCorrect": false,
"inputTokens": 7196,
"outputTokens": 1,
"latencyMs": 1115.266958000022
},
{
"questionId": "q96",
"format": "csv",
"model": "gpt-5-nano",
"expected": "10",
"actual": "10",
"isCorrect": true,
"inputTokens": 6778,
"outputTokens": 583,
"latencyMs": 5761.870166999986
},
{
"questionId": "q96",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "8",
"isCorrect": false,
"inputTokens": 8409,
"outputTokens": 5,
"latencyMs": 1110.2957919999608
},
{
"questionId": "q96",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "10",
"actual": "10",
"isCorrect": true,
"inputTokens": 7833,
"outputTokens": 2,
"latencyMs": 5206.065542000055
},
{
"questionId": "q96",
"format": "xml",
"model": "gpt-5-nano",
"expected": "10",
"actual": "10",
"isCorrect": true,
"inputTokens": 11034,
"outputTokens": 839,
"latencyMs": 10213.124458000006
},
{
"questionId": "q96",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "8",
"isCorrect": false,
"inputTokens": 13375,
"outputTokens": 5,
"latencyMs": 1085.2472919999855
},
{
"questionId": "q96",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "10",
"actual": "10",
"isCorrect": true,
"inputTokens": 13446,
"outputTokens": 2,
"latencyMs": 6148.1957500000135
},
{
"questionId": "q96",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "10",
"actual": "10",
"isCorrect": true,
"inputTokens": 7370,
"outputTokens": 647,
"latencyMs": 10606.282000000007
},
{
"questionId": "q96",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "7",
"isCorrect": false,
"inputTokens": 8380,
"outputTokens": 5,
"latencyMs": 1061.5612079999992
},
{
"questionId": "q96",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "10",
"actual": "8",
"isCorrect": false,
"inputTokens": 8422,
"outputTokens": 1,
"latencyMs": 940.8403330000001
},
{
"questionId": "q97",
"format": "json",
"model": "gpt-5-nano",
"expected": "10",
"actual": "10",
"isCorrect": true,
"inputTokens": 9736,
"outputTokens": 647,
"latencyMs": 6429.81362500001
},
{
"questionId": "q97",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "10",
"isCorrect": true,
"inputTokens": 11902,
"outputTokens": 5,
"latencyMs": 1373.5127499999944
},
{
"questionId": "q97",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "10",
"actual": "9",
"isCorrect": false,
"inputTokens": 12107,
"outputTokens": 1,
"latencyMs": 1618.8752080000122
},
{
"questionId": "q97",
"format": "toon",
"model": "gpt-5-nano",
"expected": "10",
"actual": "10",
"isCorrect": true,
"inputTokens": 6010,
"outputTokens": 583,
"latencyMs": 5288.105207999994
},
{
"questionId": "q97",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "10",
"isCorrect": true,
"inputTokens": 6988,
"outputTokens": 5,
"latencyMs": 974.4008749999921
},
{
"questionId": "q97",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "10",
"actual": "10",
"isCorrect": true,
"inputTokens": 7195,
"outputTokens": 2,
"latencyMs": 994.4026250000461
},
{
"questionId": "q97",
"format": "csv",
"model": "gpt-5-nano",
"expected": "10",
"actual": "10",
"isCorrect": true,
"inputTokens": 6778,
"outputTokens": 1479,
"latencyMs": 44513.282000000065
},
{
"questionId": "q97",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "10",
"isCorrect": true,
"inputTokens": 8409,
"outputTokens": 5,
"latencyMs": 1579.2647080000024
},
{
"questionId": "q97",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "10",
"actual": "10",
"isCorrect": true,
"inputTokens": 7832,
"outputTokens": 2,
"latencyMs": 6760.291374999972
},
{
"questionId": "q97",
"format": "xml",
"model": "gpt-5-nano",
"expected": "10",
"actual": "10",
"isCorrect": true,
"inputTokens": 11034,
"outputTokens": 647,
"latencyMs": 6886.205707999994
},
{
"questionId": "q97",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "10",
"isCorrect": true,
"inputTokens": 13375,
"outputTokens": 5,
"latencyMs": 1140.8538749999716
},
{
"questionId": "q97",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "10",
"actual": "10",
"isCorrect": true,
"inputTokens": 13445,
"outputTokens": 2,
"latencyMs": 5500.930916999932
},
{
"questionId": "q97",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "10",
"actual": "10",
"isCorrect": true,
"inputTokens": 7370,
"outputTokens": 647,
"latencyMs": 6873.12387499999
},
{
"questionId": "q97",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "9",
"isCorrect": false,
"inputTokens": 8380,
"outputTokens": 5,
"latencyMs": 1385.4246660000063
},
{
"questionId": "q97",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "10",
"actual": "9",
"isCorrect": false,
"inputTokens": 8421,
"outputTokens": 1,
"latencyMs": 1070.8007499999949
},
{
"questionId": "q98",
"format": "json",
"model": "gpt-5-nano",
"expected": "10",
"actual": "10",
"isCorrect": true,
"inputTokens": 9736,
"outputTokens": 775,
"latencyMs": 10215.419124999957
},
{
"questionId": "q98",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "8",
"isCorrect": false,
"inputTokens": 11902,
"outputTokens": 5,
"latencyMs": 1169.6882500000065
},
{
"questionId": "q98",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "10",
"actual": "10",
"isCorrect": true,
"inputTokens": 12107,
"outputTokens": 2,
"latencyMs": 1497.445791999984
},
{
"questionId": "q98",
"format": "toon",
"model": "gpt-5-nano",
"expected": "10",
"actual": "10",
"isCorrect": true,
"inputTokens": 6010,
"outputTokens": 583,
"latencyMs": 17780.296249999956
},
{
"questionId": "q98",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "8",
"isCorrect": false,
"inputTokens": 6988,
"outputTokens": 5,
"latencyMs": 1507.771624999994
},
{
"questionId": "q98",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "10",
"actual": "10",
"isCorrect": true,
"inputTokens": 7195,
"outputTokens": 2,
"latencyMs": 1089.9117079999996
},
{
"questionId": "q98",
"format": "csv",
"model": "gpt-5-nano",
"expected": "10",
"actual": "10",
"isCorrect": true,
"inputTokens": 6778,
"outputTokens": 583,
"latencyMs": 6443.644124999992
},
{
"questionId": "q98",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "10",
"isCorrect": true,
"inputTokens": 8409,
"outputTokens": 5,
"latencyMs": 1212.1155410000356
},
{
"questionId": "q98",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "10",
"actual": "10",
"isCorrect": true,
"inputTokens": 7832,
"outputTokens": 2,
"latencyMs": 5152.548582999967
},
{
"questionId": "q98",
"format": "xml",
"model": "gpt-5-nano",
"expected": "10",
"actual": "10",
"isCorrect": true,
"inputTokens": 11034,
"outputTokens": 647,
"latencyMs": 12689.804665999953
},
{
"questionId": "q98",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "10",
"isCorrect": true,
"inputTokens": 13375,
"outputTokens": 5,
"latencyMs": 1122.1935420000227
},
{
"questionId": "q98",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "10",
"actual": "10",
"isCorrect": true,
"inputTokens": 13445,
"outputTokens": 2,
"latencyMs": 1011.1309159999946
},
{
"questionId": "q98",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "10",
"actual": "10",
"isCorrect": true,
"inputTokens": 7370,
"outputTokens": 711,
"latencyMs": 9792.569583000033
},
{
"questionId": "q98",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "8",
"isCorrect": false,
"inputTokens": 8380,
"outputTokens": 5,
"latencyMs": 1111.848708000034
},
{
"questionId": "q98",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "10",
"actual": "10",
"isCorrect": true,
"inputTokens": 8421,
"outputTokens": 2,
"latencyMs": 868.7284579999978
},
{
"questionId": "q99",
"format": "json",
"model": "gpt-5-nano",
"expected": "42342.25",
"actual": "41304.82",
"isCorrect": false,
"inputTokens": 9736,
"outputTokens": 2698,
"latencyMs": 46504.10175000003
},
{
"questionId": "q99",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "42342.25",
"actual": "50,847.47",
"isCorrect": false,
"inputTokens": 11902,
"outputTokens": 9,
"latencyMs": 1987.3346250000177
},
{
"questionId": "q99",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "42342.25",
"actual": "40000.00",
"isCorrect": false,
"inputTokens": 12108,
"outputTokens": 8,
"latencyMs": 7707.775332999998
},
{
"questionId": "q99",
"format": "toon",
"model": "gpt-5-nano",
"expected": "42342.25",
"actual": "42342.25",
"isCorrect": true,
"inputTokens": 6010,
"outputTokens": 5578,
"latencyMs": 48586.554000000004
},
{
"questionId": "q99",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "42342.25",
"actual": "41,847.47",
"isCorrect": false,
"inputTokens": 6988,
"outputTokens": 9,
"latencyMs": 3438.9107920000097
},
{
"questionId": "q99",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "42342.25",
"actual": "40000.00",
"isCorrect": false,
"inputTokens": 7196,
"outputTokens": 8,
"latencyMs": 6512.329665999976
},
{
"questionId": "q99",
"format": "csv",
"model": "gpt-5-nano",
"expected": "42342.25",
"actual": "42342.25",
"isCorrect": true,
"inputTokens": 6778,
"outputTokens": 4874,
"latencyMs": 37911.18645799998
},
{
"questionId": "q99",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "42342.25",
"actual": "48,847.47",
"isCorrect": false,
"inputTokens": 8409,
"outputTokens": 9,
"latencyMs": 1071.3846250000643
},
{
"questionId": "q99",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "42342.25",
"actual": "40000.00",
"isCorrect": false,
"inputTokens": 7833,
"outputTokens": 8,
"latencyMs": 7891.89620800002
},
{
"questionId": "q99",
"format": "xml",
"model": "gpt-5-nano",
"expected": "42342.25",
"actual": "42342.25",
"isCorrect": true,
"inputTokens": 11034,
"outputTokens": 3338,
"latencyMs": 23923.247208000044
},
{
"questionId": "q99",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "42342.25",
"actual": "47,847.47",
"isCorrect": false,
"inputTokens": 13375,
"outputTokens": 9,
"latencyMs": 1182.405207999982
},
{
"questionId": "q99",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "42342.25",
"actual": "43000.00",
"isCorrect": false,
"inputTokens": 13446,
"outputTokens": 8,
"latencyMs": 9388.739500000025
},
{
"questionId": "q99",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "42342.25",
"actual": "42342.25",
"isCorrect": true,
"inputTokens": 7370,
"outputTokens": 3082,
"latencyMs": 31024.954041999998
},
{
"questionId": "q99",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "42342.25",
"actual": "47,847.89",
"isCorrect": false,
"inputTokens": 8380,
"outputTokens": 9,
"latencyMs": 1240.8969590000343
},
{
"questionId": "q99",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "42342.25",
"actual": "30900.09",
"isCorrect": false,
"inputTokens": 8422,
"outputTokens": 8,
"latencyMs": 2345.1206249999814
},
{
"questionId": "q100",
"format": "json",
"model": "gpt-5-nano",
"expected": "44",
"actual": "44",
"isCorrect": true,
"inputTokens": 9738,
"outputTokens": 2567,
"latencyMs": 53935.78729200002
},
{
"questionId": "q100",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "44",
"actual": "48",
"isCorrect": false,
"inputTokens": 11904,
"outputTokens": 5,
"latencyMs": 1066.0944579999195
},
{
"questionId": "q100",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "44",
"actual": "45",
"isCorrect": false,
"inputTokens": 12112,
"outputTokens": 2,
"latencyMs": 1494.8697500000708
},
{
"questionId": "q100",
"format": "toon",
"model": "gpt-5-nano",
"expected": "44",
"actual": "44",
"isCorrect": true,
"inputTokens": 6012,
"outputTokens": 1351,
"latencyMs": 14949.407374999952
},
{
"questionId": "q100",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "44",
"actual": "47",
"isCorrect": false,
"inputTokens": 6990,
"outputTokens": 5,
"latencyMs": 967.8411250000354
},
{
"questionId": "q100",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "44",
"actual": "44",
"isCorrect": true,
"inputTokens": 7200,
"outputTokens": 2,
"latencyMs": 12734.97745799995
},
{
"questionId": "q100",
"format": "csv",
"model": "gpt-5-nano",
"expected": "44",
"actual": "44",
"isCorrect": true,
"inputTokens": 6780,
"outputTokens": 1607,
"latencyMs": 15572.392542000045
},
{
"questionId": "q100",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "44",
"actual": "48",
"isCorrect": false,
"inputTokens": 8411,
"outputTokens": 5,
"latencyMs": 2052.4572499999776
},
{
"questionId": "q100",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "44",
"actual": "44",
"isCorrect": true,
"inputTokens": 7837,
"outputTokens": 2,
"latencyMs": 13219.975833000033
},
{
"questionId": "q100",
"format": "xml",
"model": "gpt-5-nano",
"expected": "44",
"actual": "44",
"isCorrect": true,
"inputTokens": 11036,
"outputTokens": 1735,
"latencyMs": 69773.56662499998
},
{
"questionId": "q100",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "44",
"actual": "45",
"isCorrect": false,
"inputTokens": 13377,
"outputTokens": 5,
"latencyMs": 1719.8178329999791
},
{
"questionId": "q100",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "44",
"actual": "44",
"isCorrect": true,
"inputTokens": 13450,
"outputTokens": 2,
"latencyMs": 11322.527541999938
},
{
"questionId": "q100",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "44",
"actual": "44",
"isCorrect": true,
"inputTokens": 7372,
"outputTokens": 1607,
"latencyMs": 20736.131416000077
},
{
"questionId": "q100",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "44",
"actual": "45",
"isCorrect": false,
"inputTokens": 8382,
"outputTokens": 5,
"latencyMs": 1052.186207999941
},
{
"questionId": "q100",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "44",
"actual": "47",
"isCorrect": false,
"inputTokens": 8426,
"outputTokens": 2,
"latencyMs": 1184.4893750000047
},
{
"questionId": "q101",
"format": "json",
"model": "gpt-5-nano",
"expected": "39",
"actual": "39",
"isCorrect": true,
"inputTokens": 9738,
"outputTokens": 967,
"latencyMs": 12279.209374999977
},
{
"questionId": "q101",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "39",
"actual": "38",
"isCorrect": false,
"inputTokens": 11904,
"outputTokens": 5,
"latencyMs": 1297.988250000053
},
{
"questionId": "q101",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "39",
"actual": "45",
"isCorrect": false,
"inputTokens": 12112,
"outputTokens": 2,
"latencyMs": 1760.7460000000428
},
{
"questionId": "q101",
"format": "toon",
"model": "gpt-5-nano",
"expected": "39",
"actual": "39",
"isCorrect": true,
"inputTokens": 6012,
"outputTokens": 1351,
"latencyMs": 10500.295707999961
},
{
"questionId": "q101",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "39",
"actual": "38",
"isCorrect": false,
"inputTokens": 6990,
"outputTokens": 5,
"latencyMs": 1138.843208999955
},
{
"questionId": "q101",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "39",
"actual": "39",
"isCorrect": true,
"inputTokens": 7200,
"outputTokens": 2,
"latencyMs": 9441.675416999962
},
{
"questionId": "q101",
"format": "csv",
"model": "gpt-5-nano",
"expected": "39",
"actual": "39",
"isCorrect": true,
"inputTokens": 6780,
"outputTokens": 1863,
"latencyMs": 19287.06454199995
},
{
"questionId": "q101",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "39",
"actual": "38",
"isCorrect": false,
"inputTokens": 8411,
"outputTokens": 5,
"latencyMs": 1490.810999999987
},
{
"questionId": "q101",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "39",
"actual": "39",
"isCorrect": true,
"inputTokens": 7837,
"outputTokens": 2,
"latencyMs": 12331.178375000018
},
{
"questionId": "q101",
"format": "xml",
"model": "gpt-5-nano",
"expected": "39",
"actual": "39",
"isCorrect": true,
"inputTokens": 11036,
"outputTokens": 3335,
"latencyMs": 26443.42041599995
},
{
"questionId": "q101",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "39",
"actual": "38",
"isCorrect": false,
"inputTokens": 13377,
"outputTokens": 5,
"latencyMs": 1419.3634590000147
},
{
"questionId": "q101",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "39",
"actual": "39",
"isCorrect": true,
"inputTokens": 13450,
"outputTokens": 2,
"latencyMs": 11403.771042000037
},
{
"questionId": "q101",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "39",
"actual": "39",
"isCorrect": true,
"inputTokens": 7372,
"outputTokens": 1671,
"latencyMs": 14214.94204200001
},
{
"questionId": "q101",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "39",
"actual": "38",
"isCorrect": false,
"inputTokens": 8382,
"outputTokens": 5,
"latencyMs": 1183.1556669999845
},
{
"questionId": "q101",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "39",
"actual": "39",
"isCorrect": true,
"inputTokens": 8426,
"outputTokens": 2,
"latencyMs": 12192.347249999992
},
{
"questionId": "q102",
"format": "json",
"model": "gpt-5-nano",
"expected": "32",
"actual": "32",
"isCorrect": true,
"inputTokens": 9738,
"outputTokens": 2311,
"latencyMs": 286602.893667
},
{
"questionId": "q102",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "32",
"actual": "28",
"isCorrect": false,
"inputTokens": 11904,
"outputTokens": 5,
"latencyMs": 1132.721833000076
},
{
"questionId": "q102",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "32",
"actual": "37",
"isCorrect": false,
"inputTokens": 12112,
"outputTokens": 2,
"latencyMs": 1632.5237090000883
},
{
"questionId": "q102",
"format": "toon",
"model": "gpt-5-nano",
"expected": "32",
"actual": "32",
"isCorrect": true,
"inputTokens": 6012,
"outputTokens": 839,
"latencyMs": 12142.227125000092
},
{
"questionId": "q102",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "32",
"actual": "26",
"isCorrect": false,
"inputTokens": 6990,
"outputTokens": 5,
"latencyMs": 1184.7071669999277
},
{
"questionId": "q102",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "32",
"actual": "37",
"isCorrect": false,
"inputTokens": 7200,
"outputTokens": 2,
"latencyMs": 1000.1081669999985
},
{
"questionId": "q102",
"format": "csv",
"model": "gpt-5-nano",
"expected": "32",
"actual": "32",
"isCorrect": true,
"inputTokens": 6780,
"outputTokens": 1287,
"latencyMs": 45846.97675000003
},
{
"questionId": "q102",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "32",
"actual": "28",
"isCorrect": false,
"inputTokens": 8411,
"outputTokens": 5,
"latencyMs": 1744.5200829999521
},
{
"questionId": "q102",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "32",
"actual": "32",
"isCorrect": true,
"inputTokens": 7837,
"outputTokens": 2,
"latencyMs": 12398.869249999989
},
{
"questionId": "q102",
"format": "xml",
"model": "gpt-5-nano",
"expected": "32",
"actual": "32",
"isCorrect": true,
"inputTokens": 11036,
"outputTokens": 1351,
"latencyMs": 12448.268124999944
},
{
"questionId": "q102",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "32",
"actual": "28",
"isCorrect": false,
"inputTokens": 13377,
"outputTokens": 5,
"latencyMs": 1155.887459000107
},
{
"questionId": "q102",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "32",
"actual": "32",
"isCorrect": true,
"inputTokens": 13450,
"outputTokens": 2,
"latencyMs": 12662.306666000048
},
{
"questionId": "q102",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "32",
"actual": "31",
"isCorrect": false,
"inputTokens": 7372,
"outputTokens": 1799,
"latencyMs": 15611.27658299997
},
{
"questionId": "q102",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "32",
"actual": "26",
"isCorrect": false,
"inputTokens": 8382,
"outputTokens": 5,
"latencyMs": 1592.5243330000667
},
{
"questionId": "q102",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "32",
"actual": "37",
"isCorrect": false,
"inputTokens": 8426,
"outputTokens": 2,
"latencyMs": 1257.715124999988
},
{
"questionId": "q103",
"format": "json",
"model": "gpt-5-nano",
"expected": "6975",
"actual": "6975",
"isCorrect": true,
"inputTokens": 3712,
"outputTokens": 72,
"latencyMs": 1883.4624169999734
},
{
"questionId": "q103",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "6975",
"actual": "6975",
"isCorrect": true,
"inputTokens": 4080,
"outputTokens": 6,
"latencyMs": 1072.3808749999152
},
{
"questionId": "q103",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "6975",
"actual": "6975",
"isCorrect": true,
"inputTokens": 4784,
"outputTokens": 4,
"latencyMs": 2622.4323750000913
},
{
"questionId": "q103",
"format": "toon",
"model": "gpt-5-nano",
"expected": "6975",
"actual": "6975",
"isCorrect": true,
"inputTokens": 1563,
"outputTokens": 136,
"latencyMs": 15307.557292000041
},
{
"questionId": "q103",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "6975",
"actual": "6975",
"isCorrect": true,
"inputTokens": 1509,
"outputTokens": 6,
"latencyMs": 1084.2609999999404
},
{
"questionId": "q103",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "6975",
"actual": "6975",
"isCorrect": true,
"inputTokens": 2271,
"outputTokens": 4,
"latencyMs": 2758.0986669999547
},
{
"questionId": "q103",
"format": "csv",
"model": "gpt-5-nano",
"expected": "6975",
"actual": "6975",
"isCorrect": true,
"inputTokens": 1441,
"outputTokens": 72,
"latencyMs": 1854.1639169999398
},
{
"questionId": "q103",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "6975",
"actual": "6975",
"isCorrect": true,
"inputTokens": 1445,
"outputTokens": 6,
"latencyMs": 948.2132079999428
},
{
"questionId": "q103",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "6975",
"actual": "6975",
"isCorrect": true,
"inputTokens": 2208,
"outputTokens": 4,
"latencyMs": 2243.337582999957
},
{
"questionId": "q103",
"format": "xml",
"model": "gpt-5-nano",
"expected": "6975",
"actual": "6975",
"isCorrect": true,
"inputTokens": 4423,
"outputTokens": 200,
"latencyMs": 4750.478917
},
{
"questionId": "q103",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "6975",
"actual": "6975",
"isCorrect": true,
"inputTokens": 4787,
"outputTokens": 6,
"latencyMs": 1168.2797080000164
},
{
"questionId": "q103",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "6975",
"actual": "6975",
"isCorrect": true,
"inputTokens": 5431,
"outputTokens": 4,
"latencyMs": 1235.7723750000587
},
{
"questionId": "q103",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "6975",
"actual": "6975",
"isCorrect": true,
"inputTokens": 2985,
"outputTokens": 72,
"latencyMs": 4593.343416000018
},
{
"questionId": "q103",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "6975",
"actual": "6975",
"isCorrect": true,
"inputTokens": 3110,
"outputTokens": 6,
"latencyMs": 1005.8936250000261
},
{
"questionId": "q103",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "6975",
"actual": "6975",
"isCorrect": true,
"inputTokens": 3814,
"outputTokens": 4,
"latencyMs": 1302.4004580000183
},
{
"questionId": "q104",
"format": "json",
"model": "gpt-5-nano",
"expected": "6686.23",
"actual": "6686.23",
"isCorrect": true,
"inputTokens": 3711,
"outputTokens": 138,
"latencyMs": 10838.235042000073
},
{
"questionId": "q104",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "6686.23",
"actual": "6686.23",
"isCorrect": true,
"inputTokens": 4079,
"outputTokens": 8,
"latencyMs": 1148.390958999982
},
{
"questionId": "q104",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "6686.23",
"actual": "6686.23",
"isCorrect": true,
"inputTokens": 4783,
"outputTokens": 7,
"latencyMs": 2339.6254999999655
},
{
"questionId": "q104",
"format": "toon",
"model": "gpt-5-nano",
"expected": "6686.23",
"actual": "6686.23",
"isCorrect": true,
"inputTokens": 1562,
"outputTokens": 138,
"latencyMs": 7077.6732909999555
},
{
"questionId": "q104",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "6686.23",
"actual": "6686.23",
"isCorrect": true,
"inputTokens": 1508,
"outputTokens": 8,
"latencyMs": 1064.9028750000289
},
{
"questionId": "q104",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "6686.23",
"actual": "6686.23",
"isCorrect": true,
"inputTokens": 2270,
"outputTokens": 7,
"latencyMs": 2335.216167000006
},
{
"questionId": "q104",
"format": "csv",
"model": "gpt-5-nano",
"expected": "6686.23",
"actual": "6686.23",
"isCorrect": true,
"inputTokens": 1440,
"outputTokens": 74,
"latencyMs": 5253.633124999935
},
{
"questionId": "q104",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "6686.23",
"actual": "6686.23",
"isCorrect": true,
"inputTokens": 1444,
"outputTokens": 8,
"latencyMs": 1438.5572920000413
},
{
"questionId": "q104",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "6686.23",
"actual": "6686.23",
"isCorrect": true,
"inputTokens": 2207,
"outputTokens": 7,
"latencyMs": 1807.325458999956
},
{
"questionId": "q104",
"format": "xml",
"model": "gpt-5-nano",
"expected": "6686.23",
"actual": "6686.23",
"isCorrect": true,
"inputTokens": 4422,
"outputTokens": 138,
"latencyMs": 3436.290666999994
},
{
"questionId": "q104",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "6686.23",
"actual": "6686.23",
"isCorrect": true,
"inputTokens": 4786,
"outputTokens": 8,
"latencyMs": 1125.5812910000095
},
{
"questionId": "q104",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "6686.23",
"actual": "6686.23",
"isCorrect": true,
"inputTokens": 5430,
"outputTokens": 7,
"latencyMs": 984.154334000079
},
{
"questionId": "q104",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "6686.23",
"actual": "6686.23",
"isCorrect": true,
"inputTokens": 2984,
"outputTokens": 138,
"latencyMs": 4561.665000000037
},
{
"questionId": "q104",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "6686.23",
"actual": "6686.23",
"isCorrect": true,
"inputTokens": 3109,
"outputTokens": 8,
"latencyMs": 1273.080958000035
},
{
"questionId": "q104",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "6686.23",
"actual": "6686.23",
"isCorrect": true,
"inputTokens": 3813,
"outputTokens": 7,
"latencyMs": 1065.2617909999099
},
{
"questionId": "q105",
"format": "json",
"model": "gpt-5-nano",
"expected": "7500",
"actual": "7500",
"isCorrect": true,
"inputTokens": 3712,
"outputTokens": 200,
"latencyMs": 3926.1200409999583
},
{
"questionId": "q105",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "7500",
"actual": "7500",
"isCorrect": true,
"inputTokens": 4080,
"outputTokens": 6,
"latencyMs": 1170.2935419999994
},
{
"questionId": "q105",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "7500",
"actual": "7500",
"isCorrect": true,
"inputTokens": 4784,
"outputTokens": 4,
"latencyMs": 2907.920374999987
},
{
"questionId": "q105",
"format": "toon",
"model": "gpt-5-nano",
"expected": "7500",
"actual": "7500",
"isCorrect": true,
"inputTokens": 1563,
"outputTokens": 136,
"latencyMs": 6013.766874999972
},
{
"questionId": "q105",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "7500",
"actual": "7500",
"isCorrect": true,
"inputTokens": 1509,
"outputTokens": 6,
"latencyMs": 1029.452791999909
},
{
"questionId": "q105",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "7500",
"actual": "7500",
"isCorrect": true,
"inputTokens": 2271,
"outputTokens": 4,
"latencyMs": 1767.9035409999778
},
{
"questionId": "q105",
"format": "csv",
"model": "gpt-5-nano",
"expected": "7500",
"actual": "7500",
"isCorrect": true,
"inputTokens": 1441,
"outputTokens": 200,
"latencyMs": 2931.0335839999607
},
{
"questionId": "q105",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "7500",
"actual": "7500",
"isCorrect": true,
"inputTokens": 1445,
"outputTokens": 6,
"latencyMs": 857.5665409999201
},
{
"questionId": "q105",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "7500",
"actual": "7500",
"isCorrect": true,
"inputTokens": 2208,
"outputTokens": 4,
"latencyMs": 1870.161458000075
},
{
"questionId": "q105",
"format": "xml",
"model": "gpt-5-nano",
"expected": "7500",
"actual": "7500",
"isCorrect": true,
"inputTokens": 4423,
"outputTokens": 136,
"latencyMs": 2792.1963339999784
},
{
"questionId": "q105",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "7500",
"actual": "7500",
"isCorrect": true,
"inputTokens": 4787,
"outputTokens": 6,
"latencyMs": 1112.5085419999668
},
{
"questionId": "q105",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "7500",
"actual": "7500",
"isCorrect": true,
"inputTokens": 5431,
"outputTokens": 4,
"latencyMs": 2572.699583999929
},
{
"questionId": "q105",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "7500",
"actual": "7500",
"isCorrect": true,
"inputTokens": 2985,
"outputTokens": 136,
"latencyMs": 3129.4847079999745
},
{
"questionId": "q105",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "7500",
"actual": "7500",
"isCorrect": true,
"inputTokens": 3110,
"outputTokens": 6,
"latencyMs": 2352.252790999948
},
{
"questionId": "q105",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "7500",
"actual": "7500",
"isCorrect": true,
"inputTokens": 3814,
"outputTokens": 4,
"latencyMs": 1623.8393749999814
},
{
"questionId": "q106",
"format": "json",
"model": "gpt-5-nano",
"expected": "14297.05",
"actual": "14297.05",
"isCorrect": true,
"inputTokens": 3711,
"outputTokens": 74,
"latencyMs": 5410.545292000053
},
{
"questionId": "q106",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "14297.05",
"actual": "14297.05",
"isCorrect": true,
"inputTokens": 4079,
"outputTokens": 8,
"latencyMs": 1382.8987500000512
},
{
"questionId": "q106",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "14297.05",
"actual": "14297.05",
"isCorrect": true,
"inputTokens": 4783,
"outputTokens": 8,
"latencyMs": 2918.163458999945
},
{
"questionId": "q106",
"format": "toon",
"model": "gpt-5-nano",
"expected": "14297.05",
"actual": "14297.05",
"isCorrect": true,
"inputTokens": 1562,
"outputTokens": 138,
"latencyMs": 2478.2083329999587
},
{
"questionId": "q106",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "14297.05",
"actual": "14297.05",
"isCorrect": true,
"inputTokens": 1508,
"outputTokens": 8,
"latencyMs": 1265.4150420000078
},
{
"questionId": "q106",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "14297.05",
"actual": "14297.05",
"isCorrect": true,
"inputTokens": 2270,
"outputTokens": 8,
"latencyMs": 1943.8234170000069
},
{
"questionId": "q106",
"format": "csv",
"model": "gpt-5-nano",
"expected": "14297.05",
"actual": "14297.05",
"isCorrect": true,
"inputTokens": 1440,
"outputTokens": 138,
"latencyMs": 4516.7844160000095
},
{
"questionId": "q106",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "14297.05",
"actual": "14297.05",
"isCorrect": true,
"inputTokens": 1444,
"outputTokens": 8,
"latencyMs": 1502.5052920000162
},
{
"questionId": "q106",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "14297.05",
"actual": "14297.05",
"isCorrect": true,
"inputTokens": 2207,
"outputTokens": 8,
"latencyMs": 2691.783666000003
},
{
"questionId": "q106",
"format": "xml",
"model": "gpt-5-nano",
"expected": "14297.05",
"actual": "14297.05",
"isCorrect": true,
"inputTokens": 4422,
"outputTokens": 138,
"latencyMs": 4047.482250000001
},
{
"questionId": "q106",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "14297.05",
"actual": "14297.05",
"isCorrect": true,
"inputTokens": 4786,
"outputTokens": 8,
"latencyMs": 1547.010666999966
},
{
"questionId": "q106",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "14297.05",
"actual": "14297.05",
"isCorrect": true,
"inputTokens": 5430,
"outputTokens": 8,
"latencyMs": 1679.222165999934
},
{
"questionId": "q106",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "14297.05",
"actual": "14297.05",
"isCorrect": true,
"inputTokens": 2984,
"outputTokens": 202,
"latencyMs": 4740.509624999948
},
{
"questionId": "q106",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "14297.05",
"actual": "14297.05",
"isCorrect": true,
"inputTokens": 3109,
"outputTokens": 8,
"latencyMs": 1271.0033330000006
},
{
"questionId": "q106",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "14297.05",
"actual": "14297.05",
"isCorrect": true,
"inputTokens": 3813,
"outputTokens": 8,
"latencyMs": 2636.093916999991
},
{
"questionId": "q107",
"format": "json",
"model": "gpt-5-nano",
"expected": "6692",
"actual": "6692",
"isCorrect": true,
"inputTokens": 3712,
"outputTokens": 72,
"latencyMs": 8298.315874999971
},
{
"questionId": "q107",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "6692",
"actual": "6692",
"isCorrect": true,
"inputTokens": 4080,
"outputTokens": 6,
"latencyMs": 1520.9959589999635
},
{
"questionId": "q107",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "6692",
"actual": "6692",
"isCorrect": true,
"inputTokens": 4784,
"outputTokens": 4,
"latencyMs": 2487.122250000015
},
{
"questionId": "q107",
"format": "toon",
"model": "gpt-5-nano",
"expected": "6692",
"actual": "6692",
"isCorrect": true,
"inputTokens": 1563,
"outputTokens": 136,
"latencyMs": 2142.1067079999484
},
{
"questionId": "q107",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "6692",
"actual": "6692",
"isCorrect": true,
"inputTokens": 1509,
"outputTokens": 6,
"latencyMs": 1108.5955839999951
},
{
"questionId": "q107",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "6692",
"actual": "6692",
"isCorrect": true,
"inputTokens": 2271,
"outputTokens": 4,
"latencyMs": 2469.1304579999996
},
{
"questionId": "q107",
"format": "csv",
"model": "gpt-5-nano",
"expected": "6692",
"actual": "6692",
"isCorrect": true,
"inputTokens": 1441,
"outputTokens": 136,
"latencyMs": 2567.9449590001022
},
{
"questionId": "q107",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "6692",
"actual": "6692",
"isCorrect": true,
"inputTokens": 1445,
"outputTokens": 6,
"latencyMs": 1078.092707999982
},
{
"questionId": "q107",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "6692",
"actual": "6692",
"isCorrect": true,
"inputTokens": 2208,
"outputTokens": 4,
"latencyMs": 1809.784708000021
},
{
"questionId": "q107",
"format": "xml",
"model": "gpt-5-nano",
"expected": "6692",
"actual": "6692",
"isCorrect": true,
"inputTokens": 4423,
"outputTokens": 200,
"latencyMs": 2525.847415999975
},
{
"questionId": "q107",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "6692",
"actual": "6692",
"isCorrect": true,
"inputTokens": 4787,
"outputTokens": 6,
"latencyMs": 1085.6306249999907
},
{
"questionId": "q107",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "6692",
"actual": "6692",
"isCorrect": true,
"inputTokens": 5431,
"outputTokens": 4,
"latencyMs": 2901.1133329999866
},
{
"questionId": "q107",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "6692",
"actual": "6692",
"isCorrect": true,
"inputTokens": 2985,
"outputTokens": 200,
"latencyMs": 3336.295124999946
},
{
"questionId": "q107",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "6692",
"actual": "6692",
"isCorrect": true,
"inputTokens": 3110,
"outputTokens": 6,
"latencyMs": 1092.8172920000507
},
{
"questionId": "q107",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "6692",
"actual": "6692",
"isCorrect": true,
"inputTokens": 3814,
"outputTokens": 4,
"latencyMs": 1070.4765419999603
},
{
"questionId": "q108",
"format": "json",
"model": "gpt-5-nano",
"expected": "9302.76",
"actual": "9302.76",
"isCorrect": true,
"inputTokens": 3711,
"outputTokens": 74,
"latencyMs": 4454.346332999994
},
{
"questionId": "q108",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "9302.76",
"actual": "9302.76",
"isCorrect": true,
"inputTokens": 4079,
"outputTokens": 8,
"latencyMs": 1455.8378749999683
},
{
"questionId": "q108",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "9302.76",
"actual": "9302.76",
"isCorrect": true,
"inputTokens": 4783,
"outputTokens": 7,
"latencyMs": 1775.3881249999395
},
{
"questionId": "q108",
"format": "toon",
"model": "gpt-5-nano",
"expected": "9302.76",
"actual": "9302.76",
"isCorrect": true,
"inputTokens": 1562,
"outputTokens": 74,
"latencyMs": 3750.9490000000224
},
{
"questionId": "q108",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "9302.76",
"actual": "9302.76",
"isCorrect": true,
"inputTokens": 1508,
"outputTokens": 8,
"latencyMs": 1294.0682909999741
},
{
"questionId": "q108",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "9302.76",
"actual": "9302.76",
"isCorrect": true,
"inputTokens": 2270,
"outputTokens": 7,
"latencyMs": 2086.9909169999883
},
{
"questionId": "q108",
"format": "csv",
"model": "gpt-5-nano",
"expected": "9302.76",
"actual": "9302.76",
"isCorrect": true,
"inputTokens": 1440,
"outputTokens": 138,
"latencyMs": 2283.21883300005
},
{
"questionId": "q108",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "9302.76",
"actual": "9302.76",
"isCorrect": true,
"inputTokens": 1444,
"outputTokens": 8,
"latencyMs": 983.0039999999572
},
{
"questionId": "q108",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "9302.76",
"actual": "9302.76",
"isCorrect": true,
"inputTokens": 2207,
"outputTokens": 7,
"latencyMs": 2159.7753329999978
},
{
"questionId": "q108",
"format": "xml",
"model": "gpt-5-nano",
"expected": "9302.76",
"actual": "9302.76",
"isCorrect": true,
"inputTokens": 4422,
"outputTokens": 202,
"latencyMs": 6951.322584000067
},
{
"questionId": "q108",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "9302.76",
"actual": "9302.76",
"isCorrect": true,
"inputTokens": 4786,
"outputTokens": 8,
"latencyMs": 1090.7049170000246
},
{
"questionId": "q108",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "9302.76",
"actual": "9302.76",
"isCorrect": true,
"inputTokens": 5430,
"outputTokens": 7,
"latencyMs": 1449.565457999939
},
{
"questionId": "q108",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "9302.76",
"actual": "9302.76",
"isCorrect": true,
"inputTokens": 2984,
"outputTokens": 138,
"latencyMs": 3853.0687920000637
},
{
"questionId": "q108",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "9302.76",
"actual": "9302.76",
"isCorrect": true,
"inputTokens": 3109,
"outputTokens": 8,
"latencyMs": 1126.2435420000693
},
{
"questionId": "q108",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "9302.76",
"actual": "9302.76",
"isCorrect": true,
"inputTokens": 3813,
"outputTokens": 7,
"latencyMs": 1764.1200830000453
},
{
"questionId": "q109",
"format": "json",
"model": "gpt-5-nano",
"expected": "3285",
"actual": "3285",
"isCorrect": true,
"inputTokens": 3712,
"outputTokens": 136,
"latencyMs": 3300.9657910000533
},
{
"questionId": "q109",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "3285",
"actual": "3285",
"isCorrect": true,
"inputTokens": 4080,
"outputTokens": 6,
"latencyMs": 1052.1962920000078
},
{
"questionId": "q109",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "3285",
"actual": "3285",
"isCorrect": true,
"inputTokens": 4784,
"outputTokens": 4,
"latencyMs": 3287.65862500004
},
{
"questionId": "q109",
"format": "toon",
"model": "gpt-5-nano",
"expected": "3285",
"actual": "3285",
"isCorrect": true,
"inputTokens": 1563,
"outputTokens": 200,
"latencyMs": 3891.706874999916
},
{
"questionId": "q109",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "3285",
"actual": "3285",
"isCorrect": true,
"inputTokens": 1509,
"outputTokens": 6,
"latencyMs": 1081.2852920000441
},
{
"questionId": "q109",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "3285",
"actual": "3285",
"isCorrect": true,
"inputTokens": 2271,
"outputTokens": 4,
"latencyMs": 2226.4307500000577
},
{
"questionId": "q109",
"format": "csv",
"model": "gpt-5-nano",
"expected": "3285",
"actual": "3285",
"isCorrect": true,
"inputTokens": 1441,
"outputTokens": 72,
"latencyMs": 1982.5622910000384
},
{
"questionId": "q109",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "3285",
"actual": "3285",
"isCorrect": true,
"inputTokens": 1445,
"outputTokens": 6,
"latencyMs": 929.4726250000531
},
{
"questionId": "q109",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "3285",
"actual": "3285",
"isCorrect": true,
"inputTokens": 2208,
"outputTokens": 4,
"latencyMs": 1787.2903330000117
},
{
"questionId": "q109",
"format": "xml",
"model": "gpt-5-nano",
"expected": "3285",
"actual": "3285",
"isCorrect": true,
"inputTokens": 4423,
"outputTokens": 264,
"latencyMs": 3257.529749999987
},
{
"questionId": "q109",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "3285",
"actual": "3285",
"isCorrect": true,
"inputTokens": 4787,
"outputTokens": 6,
"latencyMs": 1576.1779170000227
},
{
"questionId": "q109",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "3285",
"actual": "3285",
"isCorrect": true,
"inputTokens": 5431,
"outputTokens": 4,
"latencyMs": 2836.7503750000615
},
{
"questionId": "q109",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "3285",
"actual": "3285",
"isCorrect": true,
"inputTokens": 2985,
"outputTokens": 136,
"latencyMs": 4072.856582999928
},
{
"questionId": "q109",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "3285",
"actual": "3285",
"isCorrect": true,
"inputTokens": 3110,
"outputTokens": 6,
"latencyMs": 974.9362500000279
},
{
"questionId": "q109",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "3285",
"actual": "3285",
"isCorrect": true,
"inputTokens": 3814,
"outputTokens": 4,
"latencyMs": 1213.922833000077
},
{
"questionId": "q110",
"format": "json",
"model": "gpt-5-nano",
"expected": "3826.93",
"actual": "3826.93",
"isCorrect": true,
"inputTokens": 3711,
"outputTokens": 138,
"latencyMs": 3493.7957090000855
},
{
"questionId": "q110",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "3826.93",
"actual": "3826.93",
"isCorrect": true,
"inputTokens": 4079,
"outputTokens": 8,
"latencyMs": 1142.0260000000708
},
{
"questionId": "q110",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "3826.93",
"actual": "3826.93",
"isCorrect": true,
"inputTokens": 4783,
"outputTokens": 7,
"latencyMs": 2381.430916000041
},
{
"questionId": "q110",
"format": "toon",
"model": "gpt-5-nano",
"expected": "3826.93",
"actual": "3826.93",
"isCorrect": true,
"inputTokens": 1562,
"outputTokens": 138,
"latencyMs": 2413.9573330000276
},
{
"questionId": "q110",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "3826.93",
"actual": "3826.93",
"isCorrect": true,
"inputTokens": 1508,
"outputTokens": 8,
"latencyMs": 1847.1221249999944
},
{
"questionId": "q110",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "3826.93",
"actual": "3826.93",
"isCorrect": true,
"inputTokens": 2270,
"outputTokens": 7,
"latencyMs": 2303.37033299997
},
{
"questionId": "q110",
"format": "csv",
"model": "gpt-5-nano",
"expected": "3826.93",
"actual": "3826.93",
"isCorrect": true,
"inputTokens": 1440,
"outputTokens": 138,
"latencyMs": 2214.3459579999326
},
{
"questionId": "q110",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "3826.93",
"actual": "3826.93",
"isCorrect": true,
"inputTokens": 1444,
"outputTokens": 8,
"latencyMs": 1087.8486249999842
},
{
"questionId": "q110",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "3826.93",
"actual": "3826.93",
"isCorrect": true,
"inputTokens": 2207,
"outputTokens": 7,
"latencyMs": 1525.997917000088
},
{
"questionId": "q110",
"format": "xml",
"model": "gpt-5-nano",
"expected": "3826.93",
"actual": "3826.93",
"isCorrect": true,
"inputTokens": 4422,
"outputTokens": 202,
"latencyMs": 2952.5206250000047
},
{
"questionId": "q110",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "3826.93",
"actual": "3826.93",
"isCorrect": true,
"inputTokens": 4786,
"outputTokens": 8,
"latencyMs": 1203.7597079999978
},
{
"questionId": "q110",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "3826.93",
"actual": "3826.93",
"isCorrect": true,
"inputTokens": 5430,
"outputTokens": 7,
"latencyMs": 1580.2738329999847
},
{
"questionId": "q110",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "3826.93",
"actual": "3826.93",
"isCorrect": true,
"inputTokens": 2984,
"outputTokens": 138,
"latencyMs": 2473.919208999956
},
{
"questionId": "q110",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "3826.93",
"actual": "3826.93",
"isCorrect": true,
"inputTokens": 3109,
"outputTokens": 8,
"latencyMs": 1452.058374999906
},
{
"questionId": "q110",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "3826.93",
"actual": "3826.93",
"isCorrect": true,
"inputTokens": 3813,
"outputTokens": 7,
"latencyMs": 2691.815042000031
},
{
"questionId": "q111",
"format": "json",
"model": "gpt-5-nano",
"expected": "6191",
"actual": "6191",
"isCorrect": true,
"inputTokens": 3712,
"outputTokens": 136,
"latencyMs": 2043.9027500000084
},
{
"questionId": "q111",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "6191",
"actual": "6191",
"isCorrect": true,
"inputTokens": 4080,
"outputTokens": 6,
"latencyMs": 1085.5088339999784
},
{
"questionId": "q111",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "6191",
"actual": "6191",
"isCorrect": true,
"inputTokens": 4784,
"outputTokens": 4,
"latencyMs": 1648.2013329999754
},
{
"questionId": "q111",
"format": "toon",
"model": "gpt-5-nano",
"expected": "6191",
"actual": "6191",
"isCorrect": true,
"inputTokens": 1563,
"outputTokens": 136,
"latencyMs": 3078.3677920000628
},
{
"questionId": "q111",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "6191",
"actual": "6191",
"isCorrect": true,
"inputTokens": 1509,
"outputTokens": 6,
"latencyMs": 953.482166999951
},
{
"questionId": "q111",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "6191",
"actual": "6191",
"isCorrect": true,
"inputTokens": 2271,
"outputTokens": 4,
"latencyMs": 2107.5470000000205
},
{
"questionId": "q111",
"format": "csv",
"model": "gpt-5-nano",
"expected": "6191",
"actual": "6191",
"isCorrect": true,
"inputTokens": 1441,
"outputTokens": 72,
"latencyMs": 2056.58216599992
},
{
"questionId": "q111",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "6191",
"actual": "6191",
"isCorrect": true,
"inputTokens": 1445,
"outputTokens": 6,
"latencyMs": 1345.5024170000106
},
{
"questionId": "q111",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "6191",
"actual": "6191",
"isCorrect": true,
"inputTokens": 2208,
"outputTokens": 4,
"latencyMs": 1387.981958999997
},
{
"questionId": "q111",
"format": "xml",
"model": "gpt-5-nano",
"expected": "6191",
"actual": "6191",
"isCorrect": true,
"inputTokens": 4423,
"outputTokens": 136,
"latencyMs": 3227.920458999928
},
{
"questionId": "q111",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "6191",
"actual": "6191",
"isCorrect": true,
"inputTokens": 4787,
"outputTokens": 6,
"latencyMs": 1789.7077919999138
},
{
"questionId": "q111",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "6191",
"actual": "6191",
"isCorrect": true,
"inputTokens": 5431,
"outputTokens": 4,
"latencyMs": 3015.3227080000797
},
{
"questionId": "q111",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "6191",
"actual": "6191",
"isCorrect": true,
"inputTokens": 2985,
"outputTokens": 200,
"latencyMs": 2481.5284170000814
},
{
"questionId": "q111",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "6191",
"actual": "6191",
"isCorrect": true,
"inputTokens": 3110,
"outputTokens": 6,
"latencyMs": 2319.2710829999996
},
{
"questionId": "q111",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "6191",
"actual": "6191",
"isCorrect": true,
"inputTokens": 3814,
"outputTokens": 4,
"latencyMs": 1736.7912920000963
},
{
"questionId": "q112",
"format": "json",
"model": "gpt-5-nano",
"expected": "1854.66",
"actual": "1854.66",
"isCorrect": true,
"inputTokens": 3711,
"outputTokens": 138,
"latencyMs": 2613.5518750000047
},
{
"questionId": "q112",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "1854.66",
"actual": "1854.66",
"isCorrect": true,
"inputTokens": 4079,
"outputTokens": 8,
"latencyMs": 1411.1959170000628
},
{
"questionId": "q112",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "1854.66",
"actual": "1854.66",
"isCorrect": true,
"inputTokens": 4783,
"outputTokens": 7,
"latencyMs": 2631.1534589999355
},
{
"questionId": "q112",
"format": "toon",
"model": "gpt-5-nano",
"expected": "1854.66",
"actual": "1854.66",
"isCorrect": true,
"inputTokens": 1562,
"outputTokens": 74,
"latencyMs": 2247.1309170000022
},
{
"questionId": "q112",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "1854.66",
"actual": "1854.66",
"isCorrect": true,
"inputTokens": 1508,
"outputTokens": 8,
"latencyMs": 935.4031660000328
},
{
"questionId": "q112",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "1854.66",
"actual": "1854.66",
"isCorrect": true,
"inputTokens": 2270,
"outputTokens": 7,
"latencyMs": 3261.111125000054
},
{
"questionId": "q112",
"format": "csv",
"model": "gpt-5-nano",
"expected": "1854.66",
"actual": "1854.66",
"isCorrect": true,
"inputTokens": 1440,
"outputTokens": 74,
"latencyMs": 2420.4490409999853
},
{
"questionId": "q112",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "1854.66",
"actual": "1854.66",
"isCorrect": true,
"inputTokens": 1444,
"outputTokens": 8,
"latencyMs": 1112.1383340000175
},
{
"questionId": "q112",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "1854.66",
"actual": "1854.66",
"isCorrect": true,
"inputTokens": 2207,
"outputTokens": 7,
"latencyMs": 2340.017957999953
},
{
"questionId": "q112",
"format": "xml",
"model": "gpt-5-nano",
"expected": "1854.66",
"actual": "1854.66",
"isCorrect": true,
"inputTokens": 4422,
"outputTokens": 394,
"latencyMs": 17092.246334000025
},
{
"questionId": "q112",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "1854.66",
"actual": "1854.66",
"isCorrect": true,
"inputTokens": 4786,
"outputTokens": 8,
"latencyMs": 1153.1710829999065
},
{
"questionId": "q112",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "1854.66",
"actual": "1854.66",
"isCorrect": true,
"inputTokens": 5430,
"outputTokens": 7,
"latencyMs": 1490.9894589999458
},
{
"questionId": "q112",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "1854.66",
"actual": "1854.66",
"isCorrect": true,
"inputTokens": 2984,
"outputTokens": 202,
"latencyMs": 3339.092583000078
},
{
"questionId": "q112",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "1854.66",
"actual": "1854.66",
"isCorrect": true,
"inputTokens": 3109,
"outputTokens": 8,
"latencyMs": 1555.5642919999082
},
{
"questionId": "q112",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "1854.66",
"actual": "1854.66",
"isCorrect": true,
"inputTokens": 3813,
"outputTokens": 7,
"latencyMs": 2120.2490830000024
},
{
"questionId": "q113",
"format": "json",
"model": "gpt-5-nano",
"expected": "4696",
"actual": "4696",
"isCorrect": true,
"inputTokens": 3712,
"outputTokens": 200,
"latencyMs": 3111.5985420000507
},
{
"questionId": "q113",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "4696",
"actual": "4696",
"isCorrect": true,
"inputTokens": 4080,
"outputTokens": 6,
"latencyMs": 968.7054999999236
},
{
"questionId": "q113",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "4696",
"actual": "4696",
"isCorrect": true,
"inputTokens": 4784,
"outputTokens": 4,
"latencyMs": 3022.979249999975
},
{
"questionId": "q113",
"format": "toon",
"model": "gpt-5-nano",
"expected": "4696",
"actual": "4696",
"isCorrect": true,
"inputTokens": 1563,
"outputTokens": 136,
"latencyMs": 3835.2764579999493
},
{
"questionId": "q113",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "4696",
"actual": "4696",
"isCorrect": true,
"inputTokens": 1509,
"outputTokens": 6,
"latencyMs": 1366.261957999901
},
{
"questionId": "q113",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "4696",
"actual": "4696",
"isCorrect": true,
"inputTokens": 2271,
"outputTokens": 4,
"latencyMs": 1964.8687499999069
},
{
"questionId": "q113",
"format": "csv",
"model": "gpt-5-nano",
"expected": "4696",
"actual": "4696",
"isCorrect": true,
"inputTokens": 1441,
"outputTokens": 264,
"latencyMs": 3045.071499999962
},
{
"questionId": "q113",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "4696",
"actual": "4696",
"isCorrect": true,
"inputTokens": 1445,
"outputTokens": 6,
"latencyMs": 804.4215829999885
},
{
"questionId": "q113",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "4696",
"actual": "4696",
"isCorrect": true,
"inputTokens": 2208,
"outputTokens": 4,
"latencyMs": 1822.1931249999907
},
{
"questionId": "q113",
"format": "xml",
"model": "gpt-5-nano",
"expected": "4696",
"actual": "4696",
"isCorrect": true,
"inputTokens": 4423,
"outputTokens": 136,
"latencyMs": 2214.7718329998897
},
{
"questionId": "q113",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "4696",
"actual": "4696",
"isCorrect": true,
"inputTokens": 4787,
"outputTokens": 6,
"latencyMs": 1151.622665999923
},
{
"questionId": "q113",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "4696",
"actual": "4696",
"isCorrect": true,
"inputTokens": 5431,
"outputTokens": 4,
"latencyMs": 1762.1509579999838
},
{
"questionId": "q113",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "4696",
"actual": "4696",
"isCorrect": true,
"inputTokens": 2985,
"outputTokens": 200,
"latencyMs": 2739.4318329999223
},
{
"questionId": "q113",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "4696",
"actual": "4696",
"isCorrect": true,
"inputTokens": 3110,
"outputTokens": 6,
"latencyMs": 1074.2716670000227
},
{
"questionId": "q113",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "4696",
"actual": "4696",
"isCorrect": true,
"inputTokens": 3814,
"outputTokens": 4,
"latencyMs": 1362.9514999999665
},
{
"questionId": "q114",
"format": "json",
"model": "gpt-5-nano",
"expected": "4211.6",
"actual": "4211.6",
"isCorrect": true,
"inputTokens": 3711,
"outputTokens": 138,
"latencyMs": 2877.9115410000086
},
{
"questionId": "q114",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "4211.6",
"actual": "4211.6",
"isCorrect": true,
"inputTokens": 4079,
"outputTokens": 8,
"latencyMs": 1239.7438750000438
},
{
"questionId": "q114",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "4211.6",
"actual": "4211.6",
"isCorrect": true,
"inputTokens": 4783,
"outputTokens": 6,
"latencyMs": 1514.1683330000378
},
{
"questionId": "q114",
"format": "toon",
"model": "gpt-5-nano",
"expected": "4211.6",
"actual": "4211.6",
"isCorrect": true,
"inputTokens": 1562,
"outputTokens": 202,
"latencyMs": 2804.6751670000376
},
{
"questionId": "q114",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "4211.6",
"actual": "4211.6",
"isCorrect": true,
"inputTokens": 1508,
"outputTokens": 8,
"latencyMs": 979.8223330000183
},
{
"questionId": "q114",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "4211.6",
"actual": "4211.6",
"isCorrect": true,
"inputTokens": 2270,
"outputTokens": 6,
"latencyMs": 2323.508334000013
},
{
"questionId": "q114",
"format": "csv",
"model": "gpt-5-nano",
"expected": "4211.6",
"actual": "4211.6",
"isCorrect": true,
"inputTokens": 1440,
"outputTokens": 74,
"latencyMs": 1690.5704579999438
},
{
"questionId": "q114",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "4211.6",
"actual": "4211.6",
"isCorrect": true,
"inputTokens": 1444,
"outputTokens": 8,
"latencyMs": 886.4768329999642
},
{
"questionId": "q114",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "4211.6",
"actual": "4211.6",
"isCorrect": true,
"inputTokens": 2207,
"outputTokens": 6,
"latencyMs": 1805.5540000000037
},
{
"questionId": "q114",
"format": "xml",
"model": "gpt-5-nano",
"expected": "4211.6",
"actual": "4211.6",
"isCorrect": true,
"inputTokens": 4422,
"outputTokens": 266,
"latencyMs": 4743.464458000031
},
{
"questionId": "q114",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "4211.6",
"actual": "4211.6",
"isCorrect": true,
"inputTokens": 4786,
"outputTokens": 8,
"latencyMs": 1165.764332999941
},
{
"questionId": "q114",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "4211.6",
"actual": "4211.6",
"isCorrect": true,
"inputTokens": 5430,
"outputTokens": 6,
"latencyMs": 2148.3432500000345
},
{
"questionId": "q114",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "4211.6",
"actual": "4211.6",
"isCorrect": true,
"inputTokens": 2984,
"outputTokens": 138,
"latencyMs": 2704.757041999954
},
{
"questionId": "q114",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "4211.6",
"actual": "4211.6",
"isCorrect": true,
"inputTokens": 3109,
"outputTokens": 8,
"latencyMs": 1058.6455829999177
},
{
"questionId": "q114",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "4211.6",
"actual": "4211.6",
"isCorrect": true,
"inputTokens": 3813,
"outputTokens": 6,
"latencyMs": 2256.7089169999817
},
{
"questionId": "q115",
"format": "json",
"model": "gpt-5-nano",
"expected": "6196",
"actual": "6196",
"isCorrect": true,
"inputTokens": 3712,
"outputTokens": 136,
"latencyMs": 2360.8099159999983
},
{
"questionId": "q115",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "6196",
"actual": "6196",
"isCorrect": true,
"inputTokens": 4080,
"outputTokens": 6,
"latencyMs": 1535.8384579999838
},
{
"questionId": "q115",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "6196",
"actual": "6196",
"isCorrect": true,
"inputTokens": 4784,
"outputTokens": 4,
"latencyMs": 3278.595083000022
},
{
"questionId": "q115",
"format": "toon",
"model": "gpt-5-nano",
"expected": "6196",
"actual": "6196",
"isCorrect": true,
"inputTokens": 1563,
"outputTokens": 328,
"latencyMs": 7969.119124999968
},
{
"questionId": "q115",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "6196",
"actual": "6196",
"isCorrect": true,
"inputTokens": 1509,
"outputTokens": 6,
"latencyMs": 1099.6044580000453
},
{
"questionId": "q115",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "6196",
"actual": "6196",
"isCorrect": true,
"inputTokens": 2271,
"outputTokens": 4,
"latencyMs": 2112.666833000025
},
{
"questionId": "q115",
"format": "csv",
"model": "gpt-5-nano",
"expected": "6196",
"actual": "6196",
"isCorrect": true,
"inputTokens": 1441,
"outputTokens": 72,
"latencyMs": 1636.6678329999559
},
{
"questionId": "q115",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "6196",
"actual": "6196",
"isCorrect": true,
"inputTokens": 1445,
"outputTokens": 6,
"latencyMs": 902.907957999967
},
{
"questionId": "q115",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "6196",
"actual": "6196",
"isCorrect": true,
"inputTokens": 2208,
"outputTokens": 4,
"latencyMs": 1787.2734170000767
},
{
"questionId": "q115",
"format": "xml",
"model": "gpt-5-nano",
"expected": "6196",
"actual": "6196",
"isCorrect": true,
"inputTokens": 4423,
"outputTokens": 264,
"latencyMs": 3207.286208000034
},
{
"questionId": "q115",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "6196",
"actual": "6196",
"isCorrect": true,
"inputTokens": 4787,
"outputTokens": 6,
"latencyMs": 1176.4805000000633
},
{
"questionId": "q115",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "6196",
"actual": "6196",
"isCorrect": true,
"inputTokens": 5431,
"outputTokens": 4,
"latencyMs": 3314.0558330001077
},
{
"questionId": "q115",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "6196",
"actual": "6196",
"isCorrect": true,
"inputTokens": 2985,
"outputTokens": 200,
"latencyMs": 5537.94308300002
},
{
"questionId": "q115",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "6196",
"actual": "6196",
"isCorrect": true,
"inputTokens": 3110,
"outputTokens": 6,
"latencyMs": 914.5840419998858
},
{
"questionId": "q115",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "6196",
"actual": "6196",
"isCorrect": true,
"inputTokens": 3814,
"outputTokens": 4,
"latencyMs": 1747.4003750000848
},
{
"questionId": "q116",
"format": "json",
"model": "gpt-5-nano",
"expected": "6105.3",
"actual": "6105.3",
"isCorrect": true,
"inputTokens": 3711,
"outputTokens": 202,
"latencyMs": 5452.725000000093
},
{
"questionId": "q116",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "6105.3",
"actual": "6105.30",
"isCorrect": true,
"inputTokens": 4079,
"outputTokens": 8,
"latencyMs": 1257.8495419999817
},
{
"questionId": "q116",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "6105.3",
"actual": "6105.3",
"isCorrect": true,
"inputTokens": 4783,
"outputTokens": 6,
"latencyMs": 1183.2777500000084
},
{
"questionId": "q116",
"format": "toon",
"model": "gpt-5-nano",
"expected": "6105.3",
"actual": "6105.3",
"isCorrect": true,
"inputTokens": 1562,
"outputTokens": 330,
"latencyMs": 7140.693124999991
},
{
"questionId": "q116",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "6105.3",
"actual": "6105.3",
"isCorrect": true,
"inputTokens": 1508,
"outputTokens": 8,
"latencyMs": 1131.5447919999715
},
{
"questionId": "q116",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "6105.3",
"actual": "6105.3",
"isCorrect": true,
"inputTokens": 2270,
"outputTokens": 6,
"latencyMs": 2556.5294579999754
},
{
"questionId": "q116",
"format": "csv",
"model": "gpt-5-nano",
"expected": "6105.3",
"actual": "6105.3",
"isCorrect": true,
"inputTokens": 1440,
"outputTokens": 266,
"latencyMs": 3158.0195420000236
},
{
"questionId": "q116",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "6105.3",
"actual": "6105.3",
"isCorrect": true,
"inputTokens": 1444,
"outputTokens": 8,
"latencyMs": 926.703375000041
},
{
"questionId": "q116",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "6105.3",
"actual": "6105.3",
"isCorrect": true,
"inputTokens": 2207,
"outputTokens": 6,
"latencyMs": 2144.0341659999685
},
{
"questionId": "q116",
"format": "xml",
"model": "gpt-5-nano",
"expected": "6105.3",
"actual": "6105.3",
"isCorrect": true,
"inputTokens": 4422,
"outputTokens": 202,
"latencyMs": 3109.7603749999544
},
{
"questionId": "q116",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "6105.3",
"actual": "6105.30",
"isCorrect": true,
"inputTokens": 4786,
"outputTokens": 8,
"latencyMs": 1212.1927079999587
},
{
"questionId": "q116",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "6105.3",
"actual": "6105.3",
"isCorrect": true,
"inputTokens": 5430,
"outputTokens": 6,
"latencyMs": 3449.487916999962
},
{
"questionId": "q116",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "6105.3",
"actual": "6105.3",
"isCorrect": true,
"inputTokens": 2984,
"outputTokens": 138,
"latencyMs": 2570.9303749999963
},
{
"questionId": "q116",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "6105.3",
"actual": "6105.3",
"isCorrect": true,
"inputTokens": 3109,
"outputTokens": 8,
"latencyMs": 1058.9517500000075
},
{
"questionId": "q116",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "6105.3",
"actual": "6105.3",
"isCorrect": true,
"inputTokens": 3813,
"outputTokens": 6,
"latencyMs": 1379.4884169999277
},
{
"questionId": "q117",
"format": "json",
"model": "gpt-5-nano",
"expected": "6528",
"actual": "6528",
"isCorrect": true,
"inputTokens": 3712,
"outputTokens": 200,
"latencyMs": 2630.738624999998
},
{
"questionId": "q117",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "6528",
"actual": "6528",
"isCorrect": true,
"inputTokens": 4080,
"outputTokens": 6,
"latencyMs": 884.325959000038
},
{
"questionId": "q117",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "6528",
"actual": "6528",
"isCorrect": true,
"inputTokens": 4784,
"outputTokens": 4,
"latencyMs": 2599.299457999994
},
{
"questionId": "q117",
"format": "toon",
"model": "gpt-5-nano",
"expected": "6528",
"actual": "6528",
"isCorrect": true,
"inputTokens": 1563,
"outputTokens": 200,
"latencyMs": 5174.115041999961
},
{
"questionId": "q117",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "6528",
"actual": "6528",
"isCorrect": true,
"inputTokens": 1509,
"outputTokens": 6,
"latencyMs": 1230.3996659999248
},
{
"questionId": "q117",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "6528",
"actual": "6528",
"isCorrect": true,
"inputTokens": 2271,
"outputTokens": 4,
"latencyMs": 2081.4514590000035
},
{
"questionId": "q117",
"format": "csv",
"model": "gpt-5-nano",
"expected": "6528",
"actual": "6528",
"isCorrect": true,
"inputTokens": 1441,
"outputTokens": 456,
"latencyMs": 4708.666958000045
},
{
"questionId": "q117",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "6528",
"actual": "6528",
"isCorrect": true,
"inputTokens": 1445,
"outputTokens": 6,
"latencyMs": 1065.470417000004
},
{
"questionId": "q117",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "6528",
"actual": "6528",
"isCorrect": true,
"inputTokens": 2208,
"outputTokens": 4,
"latencyMs": 1987.3131250001024
},
{
"questionId": "q117",
"format": "xml",
"model": "gpt-5-nano",
"expected": "6528",
"actual": "6528",
"isCorrect": true,
"inputTokens": 4423,
"outputTokens": 200,
"latencyMs": 3420.324041999993
},
{
"questionId": "q117",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "6528",
"actual": "6528",
"isCorrect": true,
"inputTokens": 4787,
"outputTokens": 6,
"latencyMs": 897.2685829999391
},
{
"questionId": "q117",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "6528",
"actual": "6528",
"isCorrect": true,
"inputTokens": 5431,
"outputTokens": 4,
"latencyMs": 1442.7957500000484
},
{
"questionId": "q117",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "6528",
"actual": "6528",
"isCorrect": true,
"inputTokens": 2985,
"outputTokens": 264,
"latencyMs": 3038.6226250000764
},
{
"questionId": "q117",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "6528",
"actual": "6528",
"isCorrect": true,
"inputTokens": 3110,
"outputTokens": 6,
"latencyMs": 1260.5887920000823
},
{
"questionId": "q117",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "6528",
"actual": "6528",
"isCorrect": true,
"inputTokens": 3814,
"outputTokens": 4,
"latencyMs": 1877.516042000032
},
{
"questionId": "q118",
"format": "json",
"model": "gpt-5-nano",
"expected": "1136.09",
"actual": "1136.09",
"isCorrect": true,
"inputTokens": 3711,
"outputTokens": 266,
"latencyMs": 40974.3431249999
},
{
"questionId": "q118",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "1136.09",
"actual": "1136.09",
"isCorrect": true,
"inputTokens": 4079,
"outputTokens": 8,
"latencyMs": 867.1927500000456
},
{
"questionId": "q118",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "1136.09",
"actual": "1136.09",
"isCorrect": true,
"inputTokens": 4783,
"outputTokens": 7,
"latencyMs": 3284.4902500000317
},
{
"questionId": "q118",
"format": "toon",
"model": "gpt-5-nano",
"expected": "1136.09",
"actual": "1136.09",
"isCorrect": true,
"inputTokens": 1562,
"outputTokens": 586,
"latencyMs": 5396.599999999977
},
{
"questionId": "q118",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "1136.09",
"actual": "1136.09",
"isCorrect": true,
"inputTokens": 1508,
"outputTokens": 8,
"latencyMs": 1174.796290999977
},
{
"questionId": "q118",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "1136.09",
"actual": "1136.09",
"isCorrect": true,
"inputTokens": 2270,
"outputTokens": 7,
"latencyMs": 2751.699709000066
},
{
"questionId": "q118",
"format": "csv",
"model": "gpt-5-nano",
"expected": "1136.09",
"actual": "1136.09",
"isCorrect": true,
"inputTokens": 1440,
"outputTokens": 138,
"latencyMs": 3463.471459000022
},
{
"questionId": "q118",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "1136.09",
"actual": "1136.09",
"isCorrect": true,
"inputTokens": 1444,
"outputTokens": 8,
"latencyMs": 925.253083000076
},
{
"questionId": "q118",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "1136.09",
"actual": "1136.09",
"isCorrect": true,
"inputTokens": 2207,
"outputTokens": 7,
"latencyMs": 3240.4625000000233
},
{
"questionId": "q118",
"format": "xml",
"model": "gpt-5-nano",
"expected": "1136.09",
"actual": "1136.09",
"isCorrect": true,
"inputTokens": 4422,
"outputTokens": 138,
"latencyMs": 7405.421083000023
},
{
"questionId": "q118",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "1136.09",
"actual": "1136.09",
"isCorrect": true,
"inputTokens": 4786,
"outputTokens": 8,
"latencyMs": 1061.0794160000514
},
{
"questionId": "q118",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "1136.09",
"actual": "1136.09",
"isCorrect": true,
"inputTokens": 5430,
"outputTokens": 7,
"latencyMs": 1512.5596659999574
},
{
"questionId": "q118",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "1136.09",
"actual": "1136.09",
"isCorrect": true,
"inputTokens": 2984,
"outputTokens": 138,
"latencyMs": 2445.1606250000186
},
{
"questionId": "q118",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "1136.09",
"actual": "1136.09",
"isCorrect": true,
"inputTokens": 3109,
"outputTokens": 8,
"latencyMs": 1296.5266660000198
},
{
"questionId": "q118",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "1136.09",
"actual": "1136.09",
"isCorrect": true,
"inputTokens": 3813,
"outputTokens": 7,
"latencyMs": 1523.473083000048
},
{
"questionId": "q119",
"format": "json",
"model": "gpt-5-nano",
"expected": "4689",
"actual": "4689",
"isCorrect": true,
"inputTokens": 3712,
"outputTokens": 392,
"latencyMs": 4885.794165999978
},
{
"questionId": "q119",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "4689",
"actual": "4689",
"isCorrect": true,
"inputTokens": 4080,
"outputTokens": 6,
"latencyMs": 958.9109579999931
},
{
"questionId": "q119",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "4689",
"actual": "4689",
"isCorrect": true,
"inputTokens": 4784,
"outputTokens": 4,
"latencyMs": 2268.0900839999085
},
{
"questionId": "q119",
"format": "toon",
"model": "gpt-5-nano",
"expected": "4689",
"actual": "4689",
"isCorrect": true,
"inputTokens": 1563,
"outputTokens": 648,
"latencyMs": 12410.339000000036
},
{
"questionId": "q119",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "4689",
"actual": "4689",
"isCorrect": true,
"inputTokens": 1509,
"outputTokens": 6,
"latencyMs": 1124.1954169999808
},
{
"questionId": "q119",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "4689",
"actual": "4689",
"isCorrect": true,
"inputTokens": 2271,
"outputTokens": 4,
"latencyMs": 1842.937042000005
},
{
"questionId": "q119",
"format": "csv",
"model": "gpt-5-nano",
"expected": "4689",
"actual": "4689",
"isCorrect": true,
"inputTokens": 1441,
"outputTokens": 200,
"latencyMs": 14746.862250000006
},
{
"questionId": "q119",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "4689",
"actual": "4689",
"isCorrect": true,
"inputTokens": 1445,
"outputTokens": 6,
"latencyMs": 1070.885459000012
},
{
"questionId": "q119",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "4689",
"actual": "4689",
"isCorrect": true,
"inputTokens": 2208,
"outputTokens": 4,
"latencyMs": 2808.225791999954
},
{
"questionId": "q119",
"format": "xml",
"model": "gpt-5-nano",
"expected": "4689",
"actual": "4689",
"isCorrect": true,
"inputTokens": 4423,
"outputTokens": 264,
"latencyMs": 2815.092042000033
},
{
"questionId": "q119",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "4689",
"actual": "4689",
"isCorrect": true,
"inputTokens": 4787,
"outputTokens": 6,
"latencyMs": 1285.6015419999603
},
{
"questionId": "q119",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "4689",
"actual": "4689",
"isCorrect": true,
"inputTokens": 5431,
"outputTokens": 4,
"latencyMs": 1620.0065000000177
},
{
"questionId": "q119",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "4689",
"actual": "4689",
"isCorrect": true,
"inputTokens": 2985,
"outputTokens": 136,
"latencyMs": 3353.4782089999644
},
{
"questionId": "q119",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "4689",
"actual": "4689",
"isCorrect": true,
"inputTokens": 3110,
"outputTokens": 6,
"latencyMs": 1281.6234170000535
},
{
"questionId": "q119",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "4689",
"actual": "4689",
"isCorrect": true,
"inputTokens": 3814,
"outputTokens": 4,
"latencyMs": 1903.9000839999644
},
{
"questionId": "q120",
"format": "json",
"model": "gpt-5-nano",
"expected": "2637.73",
"actual": "2637.73",
"isCorrect": true,
"inputTokens": 3711,
"outputTokens": 330,
"latencyMs": 3469.9373749999795
},
{
"questionId": "q120",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "2637.73",
"actual": "2637.73",
"isCorrect": true,
"inputTokens": 4079,
"outputTokens": 8,
"latencyMs": 1129.299417000031
},
{
"questionId": "q120",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "2637.73",
"actual": "2637.73",
"isCorrect": true,
"inputTokens": 4783,
"outputTokens": 7,
"latencyMs": 1843.423833000008
},
{
"questionId": "q120",
"format": "toon",
"model": "gpt-5-nano",
"expected": "2637.73",
"actual": "2637.73",
"isCorrect": true,
"inputTokens": 1562,
"outputTokens": 74,
"latencyMs": 3029.9955000000773
},
{
"questionId": "q120",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "2637.73",
"actual": "2637.73",
"isCorrect": true,
"inputTokens": 1508,
"outputTokens": 8,
"latencyMs": 976.265458000009
},
{
"questionId": "q120",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "2637.73",
"actual": "2637.73",
"isCorrect": true,
"inputTokens": 2270,
"outputTokens": 7,
"latencyMs": 1941.5176659999415
},
{
"questionId": "q120",
"format": "csv",
"model": "gpt-5-nano",
"expected": "2637.73",
"actual": "2637.73",
"isCorrect": true,
"inputTokens": 1440,
"outputTokens": 138,
"latencyMs": 2326.60387500003
},
{
"questionId": "q120",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "2637.73",
"actual": "2637.73",
"isCorrect": true,
"inputTokens": 1444,
"outputTokens": 8,
"latencyMs": 1340.7505420000525
},
{
"questionId": "q120",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "2637.73",
"actual": "2637.73",
"isCorrect": true,
"inputTokens": 2207,
"outputTokens": 7,
"latencyMs": 3061.3734159999294
},
{
"questionId": "q120",
"format": "xml",
"model": "gpt-5-nano",
"expected": "2637.73",
"actual": "2637.73",
"isCorrect": true,
"inputTokens": 4422,
"outputTokens": 330,
"latencyMs": 18444.37216700008
},
{
"questionId": "q120",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "2637.73",
"actual": "2637.73",
"isCorrect": true,
"inputTokens": 4786,
"outputTokens": 8,
"latencyMs": 1472.8980000000447
},
{
"questionId": "q120",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "2637.73",
"actual": "2637.73",
"isCorrect": true,
"inputTokens": 5430,
"outputTokens": 7,
"latencyMs": 1203.1091250000754
},
{
"questionId": "q120",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "2637.73",
"actual": "2637.73",
"isCorrect": true,
"inputTokens": 2984,
"outputTokens": 266,
"latencyMs": 6852.723041999969
},
{
"questionId": "q120",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "2637.73",
"actual": "2637.73",
"isCorrect": true,
"inputTokens": 3109,
"outputTokens": 8,
"latencyMs": 1186.3190000000177
},
{
"questionId": "q120",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "2637.73",
"actual": "2637.73",
"isCorrect": true,
"inputTokens": 3813,
"outputTokens": 7,
"latencyMs": 2720.8557080000173
},
{
"questionId": "q121",
"format": "json",
"model": "gpt-5-nano",
"expected": "5685",
"actual": "5685",
"isCorrect": true,
"inputTokens": 3712,
"outputTokens": 200,
"latencyMs": 9941.250375000061
},
{
"questionId": "q121",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "5685",
"actual": "5685",
"isCorrect": true,
"inputTokens": 4080,
"outputTokens": 6,
"latencyMs": 1254.0278750000289
},
{
"questionId": "q121",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "5685",
"actual": "5685",
"isCorrect": true,
"inputTokens": 4784,
"outputTokens": 4,
"latencyMs": 3998.6611660000635
},
{
"questionId": "q121",
"format": "toon",
"model": "gpt-5-nano",
"expected": "5685",
"actual": "5685",
"isCorrect": true,
"inputTokens": 1563,
"outputTokens": 72,
"latencyMs": 2154.672750000027
},
{
"questionId": "q121",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "5685",
"actual": "5685",
"isCorrect": true,
"inputTokens": 1509,
"outputTokens": 6,
"latencyMs": 1019.1613750000251
},
{
"questionId": "q121",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "5685",
"actual": "5685",
"isCorrect": true,
"inputTokens": 2271,
"outputTokens": 4,
"latencyMs": 1623.1509579999838
},
{
"questionId": "q121",
"format": "csv",
"model": "gpt-5-nano",
"expected": "5685",
"actual": "5685",
"isCorrect": true,
"inputTokens": 1441,
"outputTokens": 200,
"latencyMs": 5643.6689169999445
},
{
"questionId": "q121",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "5685",
"actual": "5685",
"isCorrect": true,
"inputTokens": 1445,
"outputTokens": 6,
"latencyMs": 908.8649170000572
},
{
"questionId": "q121",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "5685",
"actual": "5685",
"isCorrect": true,
"inputTokens": 2208,
"outputTokens": 4,
"latencyMs": 1939.4002079999773
},
{
"questionId": "q121",
"format": "xml",
"model": "gpt-5-nano",
"expected": "5685",
"actual": "7409",
"isCorrect": false,
"inputTokens": 4423,
"outputTokens": 392,
"latencyMs": 18020.185499999905
},
{
"questionId": "q121",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "5685",
"actual": "5685",
"isCorrect": true,
"inputTokens": 4787,
"outputTokens": 6,
"latencyMs": 1167.9574999999022
},
{
"questionId": "q121",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "5685",
"actual": "5685",
"isCorrect": true,
"inputTokens": 5431,
"outputTokens": 4,
"latencyMs": 2516.0782500000205
},
{
"questionId": "q121",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "5685",
"actual": "5685",
"isCorrect": true,
"inputTokens": 2985,
"outputTokens": 136,
"latencyMs": 3538.66266599996
},
{
"questionId": "q121",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "5685",
"actual": "5685",
"isCorrect": true,
"inputTokens": 3110,
"outputTokens": 6,
"latencyMs": 1074.641707999981
},
{
"questionId": "q121",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "5685",
"actual": "5685",
"isCorrect": true,
"inputTokens": 3814,
"outputTokens": 4,
"latencyMs": 1611.2575829999987
},
{
"questionId": "q122",
"format": "json",
"model": "gpt-5-nano",
"expected": "3421.06",
"actual": "3421.06",
"isCorrect": true,
"inputTokens": 3711,
"outputTokens": 202,
"latencyMs": 3097.4197080000304
},
{
"questionId": "q122",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "3421.06",
"actual": "3421.06",
"isCorrect": true,
"inputTokens": 4079,
"outputTokens": 8,
"latencyMs": 1068.923999999999
},
{
"questionId": "q122",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "3421.06",
"actual": "3421.06",
"isCorrect": true,
"inputTokens": 4783,
"outputTokens": 7,
"latencyMs": 1952.0416250000708
},
{
"questionId": "q122",
"format": "toon",
"model": "gpt-5-nano",
"expected": "3421.06",
"actual": "3421.06",
"isCorrect": true,
"inputTokens": 1562,
"outputTokens": 906,
"latencyMs": 11804.22670800006
},
{
"questionId": "q122",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "3421.06",
"actual": "3421.06",
"isCorrect": true,
"inputTokens": 1508,
"outputTokens": 8,
"latencyMs": 1140.642707999912
},
{
"questionId": "q122",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "3421.06",
"actual": "3421.06",
"isCorrect": true,
"inputTokens": 2270,
"outputTokens": 7,
"latencyMs": 3323.8447500000475
},
{
"questionId": "q122",
"format": "csv",
"model": "gpt-5-nano",
"expected": "3421.06",
"actual": "3421.06",
"isCorrect": true,
"inputTokens": 1440,
"outputTokens": 202,
"latencyMs": 5759.3412499999395
},
{
"questionId": "q122",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "3421.06",
"actual": "3421.06",
"isCorrect": true,
"inputTokens": 1444,
"outputTokens": 8,
"latencyMs": 1174.6347079999978
},
{
"questionId": "q122",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "3421.06",
"actual": "3421.06",
"isCorrect": true,
"inputTokens": 2207,
"outputTokens": 7,
"latencyMs": 1816.737458000076
},
{
"questionId": "q122",
"format": "xml",
"model": "gpt-5-nano",
"expected": "3421.06",
"actual": "3421.06",
"isCorrect": true,
"inputTokens": 4422,
"outputTokens": 138,
"latencyMs": 14154.70395799994
},
{
"questionId": "q122",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "3421.06",
"actual": "3421.06",
"isCorrect": true,
"inputTokens": 4786,
"outputTokens": 8,
"latencyMs": 1000.3886250000214
},
{
"questionId": "q122",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "3421.06",
"actual": "3421.06",
"isCorrect": true,
"inputTokens": 5430,
"outputTokens": 7,
"latencyMs": 1258.68512499996
},
{
"questionId": "q122",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "3421.06",
"actual": "3421.06",
"isCorrect": true,
"inputTokens": 2984,
"outputTokens": 202,
"latencyMs": 2957.2190829999745
},
{
"questionId": "q122",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "3421.06",
"actual": "3421.06",
"isCorrect": true,
"inputTokens": 3109,
"outputTokens": 8,
"latencyMs": 1128.0480420000385
},
{
"questionId": "q122",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "3421.06",
"actual": "3421.06",
"isCorrect": true,
"inputTokens": 3813,
"outputTokens": 7,
"latencyMs": 1714.4717499999097
},
{
"questionId": "q123",
"format": "json",
"model": "gpt-5-nano",
"expected": "344498",
"actual": "344498",
"isCorrect": true,
"inputTokens": 3709,
"outputTokens": 2632,
"latencyMs": 31555.039709000033
},
{
"questionId": "q123",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "344498",
"actual": "188,945",
"isCorrect": false,
"inputTokens": 4077,
"outputTokens": 7,
"latencyMs": 1094.905458000023
},
{
"questionId": "q123",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "344498",
"actual": "340900",
"isCorrect": false,
"inputTokens": 4777,
"outputTokens": 6,
"latencyMs": 11993.166834000032
},
{
"questionId": "q123",
"format": "toon",
"model": "gpt-5-nano",
"expected": "344498",
"actual": "344498",
"isCorrect": true,
"inputTokens": 1560,
"outputTokens": 4360,
"latencyMs": 47190.18545800005
},
{
"questionId": "q123",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "344498",
"actual": "337,045",
"isCorrect": false,
"inputTokens": 1506,
"outputTokens": 7,
"latencyMs": 1098.8443330000155
},
{
"questionId": "q123",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "344498",
"actual": "344900",
"isCorrect": false,
"inputTokens": 2264,
"outputTokens": 6,
"latencyMs": 5982.8935409999685
},
{
"questionId": "q123",
"format": "csv",
"model": "gpt-5-nano",
"expected": "344498",
"actual": "344498",
"isCorrect": true,
"inputTokens": 1438,
"outputTokens": 3080,
"latencyMs": 27390.594666999998
},
{
"questionId": "q123",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "344498",
"actual": "372,915",
"isCorrect": false,
"inputTokens": 1442,
"outputTokens": 7,
"latencyMs": 1168.8217080000322
},
{
"questionId": "q123",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "344498",
"actual": "349900",
"isCorrect": false,
"inputTokens": 2201,
"outputTokens": 6,
"latencyMs": 5658.501500000013
},
{
"questionId": "q123",
"format": "xml",
"model": "gpt-5-nano",
"expected": "344498",
"actual": "344498",
"isCorrect": true,
"inputTokens": 4420,
"outputTokens": 3592,
"latencyMs": 25827.663583000074
},
{
"questionId": "q123",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "344498",
"actual": "372,089",
"isCorrect": false,
"inputTokens": 4784,
"outputTokens": 7,
"latencyMs": 1297.9579999999842
},
{
"questionId": "q123",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "344498",
"actual": "340900",
"isCorrect": false,
"inputTokens": 5424,
"outputTokens": 6,
"latencyMs": 7942.432666000095
},
{
"questionId": "q123",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "344498",
"actual": "344498",
"isCorrect": true,
"inputTokens": 2982,
"outputTokens": 3144,
"latencyMs": 26846.991665999987
},
{
"questionId": "q123",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "344498",
"actual": "181,854",
"isCorrect": false,
"inputTokens": 3107,
"outputTokens": 7,
"latencyMs": 1012.253665999975
},
{
"questionId": "q123",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "344498",
"actual": "300900",
"isCorrect": false,
"inputTokens": 3807,
"outputTokens": 6,
"latencyMs": 1351.5872090000194
},
{
"questionId": "q124",
"format": "json",
"model": "gpt-5-nano",
"expected": "312818.50",
"actual": "312818.50",
"isCorrect": true,
"inputTokens": 3707,
"outputTokens": 4746,
"latencyMs": 38656.80637499993
},
{
"questionId": "q124",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "312818.50",
"actual": "287,745.89",
"isCorrect": false,
"inputTokens": 4075,
"outputTokens": 9,
"latencyMs": 1336.5668340000557
},
{
"questionId": "q124",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "312818.50",
"actual": "300000.00",
"isCorrect": false,
"inputTokens": 4775,
"outputTokens": 9,
"latencyMs": 45570.00233399996
},
{
"questionId": "q124",
"format": "toon",
"model": "gpt-5-nano",
"expected": "312818.50",
"actual": "312818.50",
"isCorrect": true,
"inputTokens": 1558,
"outputTokens": 3594,
"latencyMs": 36589.136415999965
},
{
"questionId": "q124",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "312818.50",
"actual": "487,891.45",
"isCorrect": false,
"inputTokens": 1504,
"outputTokens": 9,
"latencyMs": 1009.5284579999279
},
{
"questionId": "q124",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "312818.50",
"actual": "320000.00",
"isCorrect": false,
"inputTokens": 2262,
"outputTokens": 9,
"latencyMs": 11883.04608400003
},
{
"questionId": "q124",
"format": "csv",
"model": "gpt-5-nano",
"expected": "312818.50",
"actual": "312818.50",
"isCorrect": true,
"inputTokens": 1436,
"outputTokens": 3402,
"latencyMs": 209516.903208
},
{
"questionId": "q124",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "312818.50",
"actual": "487,891.89",
"isCorrect": false,
"inputTokens": 1440,
"outputTokens": 9,
"latencyMs": 1453.1753339999123
},
{
"questionId": "q124",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "312818.50",
"actual": "329999.99",
"isCorrect": false,
"inputTokens": 2199,
"outputTokens": 9,
"latencyMs": 12329.097540999996
},
{
"questionId": "q124",
"format": "xml",
"model": "gpt-5-nano",
"expected": "312818.50",
"actual": "312818.50",
"isCorrect": true,
"inputTokens": 4418,
"outputTokens": 3274,
"latencyMs": 32337.936125000007
},
{
"questionId": "q124",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "312818.50",
"actual": "381,847.89",
"isCorrect": false,
"inputTokens": 4782,
"outputTokens": 9,
"latencyMs": 990.2755830000388
},
{
"questionId": "q124",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "312818.50",
"actual": "300000.00",
"isCorrect": false,
"inputTokens": 5422,
"outputTokens": 9,
"latencyMs": 12093.661916999961
},
{
"questionId": "q124",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "312818.50",
"actual": "312818.50",
"isCorrect": true,
"inputTokens": 2980,
"outputTokens": 6730,
"latencyMs": 45238.25570800004
},
{
"questionId": "q124",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "312818.50",
"actual": "381,847.89",
"isCorrect": false,
"inputTokens": 3105,
"outputTokens": 9,
"latencyMs": 1242.9971659999574
},
{
"questionId": "q124",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "312818.50",
"actual": "369000.00",
"isCorrect": false,
"inputTokens": 3805,
"outputTokens": 9,
"latencyMs": 1604.1214169999585
},
{
"questionId": "q125",
"format": "json",
"model": "gpt-5-nano",
"expected": "1811",
"actual": "1811",
"isCorrect": true,
"inputTokens": 3709,
"outputTokens": 2184,
"latencyMs": 22585.809791999985
},
{
"questionId": "q125",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "1811",
"actual": "1,234",
"isCorrect": false,
"inputTokens": 4078,
"outputTokens": 7,
"latencyMs": 1230.1040829999838
},
{
"questionId": "q125",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "1811",
"actual": "1811",
"isCorrect": true,
"inputTokens": 4777,
"outputTokens": 4,
"latencyMs": 9357.454415999935
},
{
"questionId": "q125",
"format": "toon",
"model": "gpt-5-nano",
"expected": "1811",
"actual": "1811",
"isCorrect": true,
"inputTokens": 1560,
"outputTokens": 2888,
"latencyMs": 19966.08491700003
},
{
"questionId": "q125",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "1811",
"actual": "1,945",
"isCorrect": false,
"inputTokens": 1507,
"outputTokens": 7,
"latencyMs": 961.2437919999938
},
{
"questionId": "q125",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "1811",
"actual": "1811",
"isCorrect": true,
"inputTokens": 2264,
"outputTokens": 4,
"latencyMs": 9139.956667000079
},
{
"questionId": "q125",
"format": "csv",
"model": "gpt-5-nano",
"expected": "1811",
"actual": "1811",
"isCorrect": true,
"inputTokens": 1438,
"outputTokens": 2504,
"latencyMs": 21066.86054100003
},
{
"questionId": "q125",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "1811",
"actual": "1,945",
"isCorrect": false,
"inputTokens": 1443,
"outputTokens": 7,
"latencyMs": 902.673208000022
},
{
"questionId": "q125",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "1811",
"actual": "1811",
"isCorrect": true,
"inputTokens": 2201,
"outputTokens": 4,
"latencyMs": 7727.039290999994
},
{
"questionId": "q125",
"format": "xml",
"model": "gpt-5-nano",
"expected": "1811",
"actual": "1811",
"isCorrect": true,
"inputTokens": 4420,
"outputTokens": 1864,
"latencyMs": 15644.210124999983
},
{
"questionId": "q125",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "1811",
"actual": "1,532",
"isCorrect": false,
"inputTokens": 4785,
"outputTokens": 7,
"latencyMs": 1311.9297919999808
},
{
"questionId": "q125",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "1811",
"actual": "1811",
"isCorrect": true,
"inputTokens": 5424,
"outputTokens": 4,
"latencyMs": 11031.984583999962
},
{
"questionId": "q125",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "1811",
"actual": "1811",
"isCorrect": true,
"inputTokens": 2982,
"outputTokens": 1928,
"latencyMs": 26268.215167000075
},
{
"questionId": "q125",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "1811",
"actual": "1,454",
"isCorrect": false,
"inputTokens": 3108,
"outputTokens": 7,
"latencyMs": 1283.3860000000568
},
{
"questionId": "q125",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "1811",
"actual": "1560",
"isCorrect": false,
"inputTokens": 3807,
"outputTokens": 4,
"latencyMs": 1390.9544999999925
},
{
"questionId": "q126",
"format": "json",
"model": "gpt-5-nano",
"expected": "42",
"actual": "42",
"isCorrect": true,
"inputTokens": 3709,
"outputTokens": 1671,
"latencyMs": 18722.413541999995
},
{
"questionId": "q126",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "42",
"actual": "42",
"isCorrect": true,
"inputTokens": 4078,
"outputTokens": 5,
"latencyMs": 957.5536249999423
},
{
"questionId": "q126",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "42",
"actual": "47",
"isCorrect": false,
"inputTokens": 4779,
"outputTokens": 2,
"latencyMs": 1718.3615829999326
},
{
"questionId": "q126",
"format": "toon",
"model": "gpt-5-nano",
"expected": "42",
"actual": "42",
"isCorrect": true,
"inputTokens": 1560,
"outputTokens": 2439,
"latencyMs": 20739.166833000025
},
{
"questionId": "q126",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "42",
"actual": "42",
"isCorrect": true,
"inputTokens": 1507,
"outputTokens": 5,
"latencyMs": 1305.5439999999944
},
{
"questionId": "q126",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "42",
"actual": "42",
"isCorrect": true,
"inputTokens": 2266,
"outputTokens": 2,
"latencyMs": 13351.089582999935
},
{
"questionId": "q126",
"format": "csv",
"model": "gpt-5-nano",
"expected": "42",
"actual": "42",
"isCorrect": true,
"inputTokens": 1438,
"outputTokens": 2567,
"latencyMs": 23067.457167000044
},
{
"questionId": "q126",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "42",
"actual": "42",
"isCorrect": true,
"inputTokens": 1443,
"outputTokens": 5,
"latencyMs": 1073.1606669999892
},
{
"questionId": "q126",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "42",
"actual": "42",
"isCorrect": true,
"inputTokens": 2203,
"outputTokens": 2,
"latencyMs": 22770.808125000098
},
{
"questionId": "q126",
"format": "xml",
"model": "gpt-5-nano",
"expected": "42",
"actual": "42",
"isCorrect": true,
"inputTokens": 4420,
"outputTokens": 2439,
"latencyMs": 28125.872208000044
},
{
"questionId": "q126",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "42",
"actual": "54",
"isCorrect": false,
"inputTokens": 4785,
"outputTokens": 5,
"latencyMs": 1046.3992919999873
},
{
"questionId": "q126",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "42",
"actual": "42",
"isCorrect": true,
"inputTokens": 5426,
"outputTokens": 2,
"latencyMs": 12982.094000000041
},
{
"questionId": "q126",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "42",
"actual": "42",
"isCorrect": true,
"inputTokens": 2982,
"outputTokens": 2631,
"latencyMs": 31181.451875000028
},
{
"questionId": "q126",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "42",
"actual": "47",
"isCorrect": false,
"inputTokens": 3108,
"outputTokens": 5,
"latencyMs": 1418.826708000037
},
{
"questionId": "q126",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "42",
"actual": "49",
"isCorrect": false,
"inputTokens": 3809,
"outputTokens": 2,
"latencyMs": 2009.2083750000456
},
{
"questionId": "q127",
"format": "json",
"model": "gpt-5-nano",
"expected": "28",
"actual": "28",
"isCorrect": true,
"inputTokens": 3709,
"outputTokens": 2503,
"latencyMs": 26827.34341699991
},
{
"questionId": "q127",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "28",
"actual": "24",
"isCorrect": false,
"inputTokens": 4078,
"outputTokens": 5,
"latencyMs": 1093.9559999998892
},
{
"questionId": "q127",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "28",
"actual": "28",
"isCorrect": true,
"inputTokens": 4779,
"outputTokens": 2,
"latencyMs": 18861.496042000013
},
{
"questionId": "q127",
"format": "toon",
"model": "gpt-5-nano",
"expected": "28",
"actual": "28",
"isCorrect": true,
"inputTokens": 1560,
"outputTokens": 1799,
"latencyMs": 18378.229374999995
},
{
"questionId": "q127",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "28",
"actual": "26",
"isCorrect": false,
"inputTokens": 1507,
"outputTokens": 5,
"latencyMs": 1111.1742920000106
},
{
"questionId": "q127",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "28",
"actual": "28",
"isCorrect": true,
"inputTokens": 2266,
"outputTokens": 2,
"latencyMs": 12380.956957999966
},
{
"questionId": "q127",
"format": "csv",
"model": "gpt-5-nano",
"expected": "28",
"actual": "28",
"isCorrect": true,
"inputTokens": 1438,
"outputTokens": 2055,
"latencyMs": 112325.29683300003
},
{
"questionId": "q127",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "28",
"actual": "23",
"isCorrect": false,
"inputTokens": 1443,
"outputTokens": 5,
"latencyMs": 1231.2409169999883
},
{
"questionId": "q127",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "28",
"actual": "28",
"isCorrect": true,
"inputTokens": 2203,
"outputTokens": 2,
"latencyMs": 20394.07720900001
},
{
"questionId": "q127",
"format": "xml",
"model": "gpt-5-nano",
"expected": "28",
"actual": "28",
"isCorrect": true,
"inputTokens": 4420,
"outputTokens": 1799,
"latencyMs": 22818.38325000007
},
{
"questionId": "q127",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "28",
"actual": "24",
"isCorrect": false,
"inputTokens": 4785,
"outputTokens": 5,
"latencyMs": 1324.3675420000218
},
{
"questionId": "q127",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "28",
"actual": "28",
"isCorrect": true,
"inputTokens": 5426,
"outputTokens": 2,
"latencyMs": 14308.32895799994
},
{
"questionId": "q127",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "28",
"actual": "28",
"isCorrect": true,
"inputTokens": 2982,
"outputTokens": 2055,
"latencyMs": 22493.268166999915
},
{
"questionId": "q127",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "28",
"actual": "23",
"isCorrect": false,
"inputTokens": 3108,
"outputTokens": 5,
"latencyMs": 1449.5348340000492
},
{
"questionId": "q127",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "28",
"actual": "31",
"isCorrect": false,
"inputTokens": 3809,
"outputTokens": 2,
"latencyMs": 1329.5626659999834
},
{
"questionId": "q128",
"format": "json",
"model": "gpt-5-nano",
"expected": "11",
"actual": "11",
"isCorrect": true,
"inputTokens": 3709,
"outputTokens": 2183,
"latencyMs": 20410.59154199995
},
{
"questionId": "q128",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "11",
"actual": "11",
"isCorrect": true,
"inputTokens": 4078,
"outputTokens": 5,
"latencyMs": 1137.8916250000475
},
{
"questionId": "q128",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "11",
"actual": "11",
"isCorrect": true,
"inputTokens": 4779,
"outputTokens": 2,
"latencyMs": 15306.355875000008
},
{
"questionId": "q128",
"format": "toon",
"model": "gpt-5-nano",
"expected": "11",
"actual": "11",
"isCorrect": true,
"inputTokens": 1560,
"outputTokens": 967,
"latencyMs": 9355.326041999971
},
{
"questionId": "q128",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "11",
"actual": "12",
"isCorrect": false,
"inputTokens": 1507,
"outputTokens": 5,
"latencyMs": 970.5706669999054
},
{
"questionId": "q128",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "11",
"actual": "11",
"isCorrect": true,
"inputTokens": 2266,
"outputTokens": 2,
"latencyMs": 12738.58170900005
},
{
"questionId": "q128",
"format": "csv",
"model": "gpt-5-nano",
"expected": "11",
"actual": "11",
"isCorrect": true,
"inputTokens": 1438,
"outputTokens": 1095,
"latencyMs": 11532.495875000022
},
{
"questionId": "q128",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "11",
"actual": "11",
"isCorrect": true,
"inputTokens": 1443,
"outputTokens": 5,
"latencyMs": 1092.326875000028
},
{
"questionId": "q128",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "11",
"actual": "11",
"isCorrect": true,
"inputTokens": 2203,
"outputTokens": 2,
"latencyMs": 9477.962708000094
},
{
"questionId": "q128",
"format": "xml",
"model": "gpt-5-nano",
"expected": "11",
"actual": "11",
"isCorrect": true,
"inputTokens": 4420,
"outputTokens": 1287,
"latencyMs": 12363.918167000054
},
{
"questionId": "q128",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "11",
"actual": "11",
"isCorrect": true,
"inputTokens": 4785,
"outputTokens": 5,
"latencyMs": 1086.439250000054
},
{
"questionId": "q128",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "11",
"actual": "11",
"isCorrect": true,
"inputTokens": 5426,
"outputTokens": 2,
"latencyMs": 13847.167500000098
},
{
"questionId": "q128",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "11",
"actual": "11",
"isCorrect": true,
"inputTokens": 2982,
"outputTokens": 1607,
"latencyMs": 18025.304333999986
},
{
"questionId": "q128",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "11",
"actual": "11",
"isCorrect": true,
"inputTokens": 3108,
"outputTokens": 5,
"latencyMs": 1525.7963329999475
},
{
"questionId": "q128",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "11",
"actual": "11",
"isCorrect": true,
"inputTokens": 3809,
"outputTokens": 2,
"latencyMs": 11297.281415999983
},
{
"questionId": "q129",
"format": "json",
"model": "gpt-5-nano",
"expected": "58",
"actual": "58",
"isCorrect": true,
"inputTokens": 3708,
"outputTokens": 1607,
"latencyMs": 16793.02033300011
},
{
"questionId": "q129",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "58",
"actual": "50",
"isCorrect": false,
"inputTokens": 4078,
"outputTokens": 5,
"latencyMs": 1524.2867090000072
},
{
"questionId": "q129",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "58",
"actual": "58",
"isCorrect": true,
"inputTokens": 4777,
"outputTokens": 2,
"latencyMs": 20291.370166999986
},
{
"questionId": "q129",
"format": "toon",
"model": "gpt-5-nano",
"expected": "58",
"actual": "58",
"isCorrect": true,
"inputTokens": 1559,
"outputTokens": 2631,
"latencyMs": 31767.777667000075
},
{
"questionId": "q129",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "58",
"actual": "47",
"isCorrect": false,
"inputTokens": 1507,
"outputTokens": 5,
"latencyMs": 1128.108874999918
},
{
"questionId": "q129",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "58",
"actual": "58",
"isCorrect": true,
"inputTokens": 2264,
"outputTokens": 2,
"latencyMs": 17774.151832999894
},
{
"questionId": "q129",
"format": "csv",
"model": "gpt-5-nano",
"expected": "58",
"actual": "58",
"isCorrect": true,
"inputTokens": 1437,
"outputTokens": 2887,
"latencyMs": 24058.048583999975
},
{
"questionId": "q129",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "58",
"actual": "54",
"isCorrect": false,
"inputTokens": 1443,
"outputTokens": 5,
"latencyMs": 833.2049999999581
},
{
"questionId": "q129",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "58",
"actual": "58",
"isCorrect": true,
"inputTokens": 2201,
"outputTokens": 2,
"latencyMs": 7901.533541000099
},
{
"questionId": "q129",
"format": "xml",
"model": "gpt-5-nano",
"expected": "58",
"actual": "58",
"isCorrect": true,
"inputTokens": 4419,
"outputTokens": 1415,
"latencyMs": 13345.296500000055
},
{
"questionId": "q129",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "58",
"actual": "54",
"isCorrect": false,
"inputTokens": 4785,
"outputTokens": 5,
"latencyMs": 1001.3450419999426
},
{
"questionId": "q129",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "58",
"actual": "55",
"isCorrect": false,
"inputTokens": 5424,
"outputTokens": 2,
"latencyMs": 2326.790707999957
},
{
"questionId": "q129",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "58",
"actual": "58",
"isCorrect": true,
"inputTokens": 2981,
"outputTokens": 1287,
"latencyMs": 14444.245874999906
},
{
"questionId": "q129",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "58",
"actual": "54",
"isCorrect": false,
"inputTokens": 3108,
"outputTokens": 5,
"latencyMs": 1060.1971249999478
},
{
"questionId": "q129",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "58",
"actual": "59",
"isCorrect": false,
"inputTokens": 3807,
"outputTokens": 2,
"latencyMs": 2816.4778749999823
},
{
"questionId": "q130",
"format": "json",
"model": "gpt-5-nano",
"expected": "41",
"actual": "41",
"isCorrect": true,
"inputTokens": 3708,
"outputTokens": 3015,
"latencyMs": 190630.39133400004
},
{
"questionId": "q130",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "41",
"actual": "31",
"isCorrect": false,
"inputTokens": 4078,
"outputTokens": 5,
"latencyMs": 5375.239707999979
},
{
"questionId": "q130",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "41",
"actual": "41",
"isCorrect": true,
"inputTokens": 4777,
"outputTokens": 2,
"latencyMs": 19789.381042000023
},
{
"questionId": "q130",
"format": "toon",
"model": "gpt-5-nano",
"expected": "41",
"actual": "41",
"isCorrect": true,
"inputTokens": 1559,
"outputTokens": 2055,
"latencyMs": 16472.23841599992
},
{
"questionId": "q130",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "41",
"actual": "38",
"isCorrect": false,
"inputTokens": 1507,
"outputTokens": 5,
"latencyMs": 1042.922583000036
},
{
"questionId": "q130",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "41",
"actual": "41",
"isCorrect": true,
"inputTokens": 2264,
"outputTokens": 2,
"latencyMs": 13095.397083000047
},
{
"questionId": "q130",
"format": "csv",
"model": "gpt-5-nano",
"expected": "41",
"actual": "41",
"isCorrect": true,
"inputTokens": 1437,
"outputTokens": 2311,
"latencyMs": 26893.475125000114
},
{
"questionId": "q130",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "41",
"actual": "38",
"isCorrect": false,
"inputTokens": 1443,
"outputTokens": 5,
"latencyMs": 1042.875250000041
},
{
"questionId": "q130",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "41",
"actual": "41",
"isCorrect": true,
"inputTokens": 2201,
"outputTokens": 2,
"latencyMs": 28097.87474999996
},
{
"questionId": "q130",
"format": "xml",
"model": "gpt-5-nano",
"expected": "41",
"actual": "42",
"isCorrect": false,
"inputTokens": 4419,
"outputTokens": 1735,
"latencyMs": 14091.963709000032
},
{
"questionId": "q130",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "41",
"actual": "31",
"isCorrect": false,
"inputTokens": 4785,
"outputTokens": 5,
"latencyMs": 1151.6397919999436
},
{
"questionId": "q130",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "41",
"actual": "41",
"isCorrect": true,
"inputTokens": 5424,
"outputTokens": 2,
"latencyMs": 15769.612874999992
},
{
"questionId": "q130",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "41",
"actual": "41",
"isCorrect": true,
"inputTokens": 2981,
"outputTokens": 1799,
"latencyMs": 18804.838290999993
},
{
"questionId": "q130",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "41",
"actual": "31",
"isCorrect": false,
"inputTokens": 3108,
"outputTokens": 5,
"latencyMs": 1030.810417000088
},
{
"questionId": "q130",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "41",
"actual": "41",
"isCorrect": true,
"inputTokens": 3807,
"outputTokens": 2,
"latencyMs": 14482.474917000043
},
{
"questionId": "q131",
"format": "json",
"model": "gpt-5-nano",
"expected": "23",
"actual": "23",
"isCorrect": true,
"inputTokens": 3708,
"outputTokens": 1351,
"latencyMs": 21887.844958
},
{
"questionId": "q131",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "23",
"actual": "20",
"isCorrect": false,
"inputTokens": 4078,
"outputTokens": 5,
"latencyMs": 1332.5089160000207
},
{
"questionId": "q131",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "23",
"actual": "23",
"isCorrect": true,
"inputTokens": 4777,
"outputTokens": 2,
"latencyMs": 17226.03358399996
},
{
"questionId": "q131",
"format": "toon",
"model": "gpt-5-nano",
"expected": "23",
"actual": "23",
"isCorrect": true,
"inputTokens": 1559,
"outputTokens": 2055,
"latencyMs": 20772.763792000012
},
{
"questionId": "q131",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "23",
"actual": "20",
"isCorrect": false,
"inputTokens": 1507,
"outputTokens": 5,
"latencyMs": 966.6354170000413
},
{
"questionId": "q131",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "23",
"actual": "23",
"isCorrect": true,
"inputTokens": 2264,
"outputTokens": 2,
"latencyMs": 10442.985291999998
},
{
"questionId": "q131",
"format": "csv",
"model": "gpt-5-nano",
"expected": "23",
"actual": "23",
"isCorrect": true,
"inputTokens": 1437,
"outputTokens": 1095,
"latencyMs": 10072.030124999932
},
{
"questionId": "q131",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "23",
"actual": "20",
"isCorrect": false,
"inputTokens": 1443,
"outputTokens": 5,
"latencyMs": 1233.0955420000246
},
{
"questionId": "q131",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "23",
"actual": "23",
"isCorrect": true,
"inputTokens": 2201,
"outputTokens": 2,
"latencyMs": 18590.031917000073
},
{
"questionId": "q131",
"format": "xml",
"model": "gpt-5-nano",
"expected": "23",
"actual": "23",
"isCorrect": true,
"inputTokens": 4419,
"outputTokens": 1735,
"latencyMs": 17035.41470799991
},
{
"questionId": "q131",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "23",
"actual": "21",
"isCorrect": false,
"inputTokens": 4785,
"outputTokens": 5,
"latencyMs": 994.0176249999786
},
{
"questionId": "q131",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "23",
"actual": "23",
"isCorrect": true,
"inputTokens": 5424,
"outputTokens": 2,
"latencyMs": 12477.123250000062
},
{
"questionId": "q131",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "23",
"actual": "23",
"isCorrect": true,
"inputTokens": 2981,
"outputTokens": 1479,
"latencyMs": 14346.053416999988
},
{
"questionId": "q131",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "23",
"actual": "21",
"isCorrect": false,
"inputTokens": 3108,
"outputTokens": 5,
"latencyMs": 1269.5552920000628
},
{
"questionId": "q131",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "23",
"actual": "23",
"isCorrect": true,
"inputTokens": 3807,
"outputTokens": 2,
"latencyMs": 13739.479209000012
},
{
"questionId": "q132",
"format": "json",
"model": "gpt-5-nano",
"expected": "430828",
"actual": "430828",
"isCorrect": true,
"inputTokens": 15187,
"outputTokens": 136,
"latencyMs": 3680.113916000002
},
{
"questionId": "q132",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "430828",
"actual": "430828",
"isCorrect": true,
"inputTokens": 17409,
"outputTokens": 6,
"latencyMs": 1548.528917000047
},
{
"questionId": "q132",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "430828",
"actual": "430828",
"isCorrect": true,
"inputTokens": 19991,
"outputTokens": 6,
"latencyMs": 1637.454792000004
},
{
"questionId": "q132",
"format": "toon",
"model": "gpt-5-nano",
"expected": "430828",
"actual": "430828",
"isCorrect": true,
"inputTokens": 8788,
"outputTokens": 776,
"latencyMs": 8918.199665999971
},
{
"questionId": "q132",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "430828",
"actual": "430828",
"isCorrect": true,
"inputTokens": 9279,
"outputTokens": 6,
"latencyMs": 1900.8446669999976
},
{
"questionId": "q132",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "430828",
"actual": "0",
"isCorrect": false,
"inputTokens": 12337,
"outputTokens": 1,
"latencyMs": 2677.7128749999683
},
{
"questionId": "q132",
"format": "csv",
"model": "gpt-5-nano",
"expected": "430828",
"actual": "430828",
"isCorrect": true,
"inputTokens": 8556,
"outputTokens": 712,
"latencyMs": 10733.462500000023
},
{
"questionId": "q132",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "430828",
"actual": "430828",
"isCorrect": true,
"inputTokens": 9125,
"outputTokens": 6,
"latencyMs": 1135.363000000012
},
{
"questionId": "q132",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "430828",
"actual": "430828",
"isCorrect": true,
"inputTokens": 12207,
"outputTokens": 6,
"latencyMs": 1007.8897500000894
},
{
"questionId": "q132",
"format": "xml",
"model": "gpt-5-nano",
"expected": "430828",
"actual": "430828",
"isCorrect": true,
"inputTokens": 17138,
"outputTokens": 328,
"latencyMs": 7708.789500000072
},
{
"questionId": "q132",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "430828",
"actual": "430828",
"isCorrect": true,
"inputTokens": 19804,
"outputTokens": 6,
"latencyMs": 1477.8527500000782
},
{
"questionId": "q132",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "430828",
"actual": "430828",
"isCorrect": true,
"inputTokens": 21881,
"outputTokens": 6,
"latencyMs": 2380.750500000082
},
{
"questionId": "q132",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "430828",
"actual": "430828",
"isCorrect": true,
"inputTokens": 13171,
"outputTokens": 328,
"latencyMs": 9429.131750000059
},
{
"questionId": "q132",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "430828",
"actual": "430828",
"isCorrect": true,
"inputTokens": 14483,
"outputTokens": 6,
"latencyMs": 1359.2385419999482
},
{
"questionId": "q132",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "430828",
"actual": "430828",
"isCorrect": true,
"inputTokens": 17076,
"outputTokens": 6,
"latencyMs": 1939.293042000034
},
{
"questionId": "q133",
"format": "json",
"model": "gpt-5-nano",
"expected": "11798",
"actual": "11798",
"isCorrect": true,
"inputTokens": 15189,
"outputTokens": 392,
"latencyMs": 6479.065457999939
},
{
"questionId": "q133",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "11798",
"actual": "11798",
"isCorrect": true,
"inputTokens": 17410,
"outputTokens": 6,
"latencyMs": 1155.017041999963
},
{
"questionId": "q133",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "11798",
"actual": "11798",
"isCorrect": true,
"inputTokens": 19992,
"outputTokens": 5,
"latencyMs": 2049.621832999983
},
{
"questionId": "q133",
"format": "toon",
"model": "gpt-5-nano",
"expected": "11798",
"actual": "11798",
"isCorrect": true,
"inputTokens": 8790,
"outputTokens": 648,
"latencyMs": 11672.019874999998
},
{
"questionId": "q133",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "11798",
"actual": "11798",
"isCorrect": true,
"inputTokens": 9280,
"outputTokens": 6,
"latencyMs": 1597.3725000000559
},
{
"questionId": "q133",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "11798",
"actual": "0",
"isCorrect": false,
"inputTokens": 12338,
"outputTokens": 1,
"latencyMs": 11414.63520800008
},
{
"questionId": "q133",
"format": "csv",
"model": "gpt-5-nano",
"expected": "11798",
"actual": "11798",
"isCorrect": true,
"inputTokens": 8558,
"outputTokens": 584,
"latencyMs": 15138.947667
},
{
"questionId": "q133",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "11798",
"actual": "11798",
"isCorrect": true,
"inputTokens": 9126,
"outputTokens": 6,
"latencyMs": 1173.9259160000365
},
{
"questionId": "q133",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "11798",
"actual": "11798",
"isCorrect": true,
"inputTokens": 12208,
"outputTokens": 5,
"latencyMs": 2788.6645000000717
},
{
"questionId": "q133",
"format": "xml",
"model": "gpt-5-nano",
"expected": "11798",
"actual": "11798",
"isCorrect": true,
"inputTokens": 17140,
"outputTokens": 328,
"latencyMs": 4541.789875000017
},
{
"questionId": "q133",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "11798",
"actual": "11798",
"isCorrect": true,
"inputTokens": 19805,
"outputTokens": 6,
"latencyMs": 1787.0144160001073
},
{
"questionId": "q133",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "11798",
"actual": "11798",
"isCorrect": true,
"inputTokens": 21882,
"outputTokens": 5,
"latencyMs": 3930.188833000022
},
{
"questionId": "q133",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "11798",
"actual": "11798",
"isCorrect": true,
"inputTokens": 13173,
"outputTokens": 264,
"latencyMs": 4459.655541999964
},
{
"questionId": "q133",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "11798",
"actual": "11798",
"isCorrect": true,
"inputTokens": 14484,
"outputTokens": 6,
"latencyMs": 1239.003000000026
},
{
"questionId": "q133",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "11798",
"actual": "11798",
"isCorrect": true,
"inputTokens": 17077,
"outputTokens": 5,
"latencyMs": 4828.425707999966
},
{
"questionId": "q134",
"format": "json",
"model": "gpt-5-nano",
"expected": "183631",
"actual": "183631",
"isCorrect": true,
"inputTokens": 15192,
"outputTokens": 200,
"latencyMs": 4039.568958000047
},
{
"questionId": "q134",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "183631",
"actual": "183631",
"isCorrect": true,
"inputTokens": 17412,
"outputTokens": 6,
"latencyMs": 1455.9585000000661
},
{
"questionId": "q134",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "183631",
"actual": "183631",
"isCorrect": true,
"inputTokens": 19995,
"outputTokens": 6,
"latencyMs": 1600.7708750000456
},
{
"questionId": "q134",
"format": "toon",
"model": "gpt-5-nano",
"expected": "183631",
"actual": "183631",
"isCorrect": true,
"inputTokens": 8793,
"outputTokens": 456,
"latencyMs": 5973.896042000037
},
{
"questionId": "q134",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "183631",
"actual": "183631",
"isCorrect": true,
"inputTokens": 9282,
"outputTokens": 6,
"latencyMs": 2000.6470419999678
},
{
"questionId": "q134",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "183631",
"actual": "183631",
"isCorrect": true,
"inputTokens": 12341,
"outputTokens": 6,
"latencyMs": 2543.431542000035
},
{
"questionId": "q134",
"format": "csv",
"model": "gpt-5-nano",
"expected": "183631",
"actual": "183631",
"isCorrect": true,
"inputTokens": 8561,
"outputTokens": 648,
"latencyMs": 6973.037040999974
},
{
"questionId": "q134",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "183631",
"actual": "183631",
"isCorrect": true,
"inputTokens": 9128,
"outputTokens": 6,
"latencyMs": 1655.6718330000294
},
{
"questionId": "q134",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "183631",
"actual": "183631",
"isCorrect": true,
"inputTokens": 12211,
"outputTokens": 6,
"latencyMs": 2357.3444590000436
},
{
"questionId": "q134",
"format": "xml",
"model": "gpt-5-nano",
"expected": "183631",
"actual": "183631",
"isCorrect": true,
"inputTokens": 17143,
"outputTokens": 392,
"latencyMs": 6136.790167000028
},
{
"questionId": "q134",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "183631",
"actual": "183631",
"isCorrect": true,
"inputTokens": 19807,
"outputTokens": 6,
"latencyMs": 2510.24762499996
},
{
"questionId": "q134",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "183631",
"actual": "183631",
"isCorrect": true,
"inputTokens": 21885,
"outputTokens": 6,
"latencyMs": 1737.0276670000749
},
{
"questionId": "q134",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "183631",
"actual": "183631",
"isCorrect": true,
"inputTokens": 13176,
"outputTokens": 520,
"latencyMs": 5081.17487499991
},
{
"questionId": "q134",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "183631",
"actual": "183631",
"isCorrect": true,
"inputTokens": 14486,
"outputTokens": 6,
"latencyMs": 1191.4632079999428
},
{
"questionId": "q134",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "183631",
"actual": "183631",
"isCorrect": true,
"inputTokens": 17080,
"outputTokens": 6,
"latencyMs": 1325.217249999987
},
{
"questionId": "q135",
"format": "json",
"model": "gpt-5-nano",
"expected": "29246",
"actual": "29246",
"isCorrect": true,
"inputTokens": 15191,
"outputTokens": 328,
"latencyMs": 3314.1483749999898
},
{
"questionId": "q135",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "29246",
"actual": "29246",
"isCorrect": true,
"inputTokens": 17412,
"outputTokens": 6,
"latencyMs": 1204.2171249999665
},
{
"questionId": "q135",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "29246",
"actual": "29246",
"isCorrect": true,
"inputTokens": 19994,
"outputTokens": 5,
"latencyMs": 2558.019417000003
},
{
"questionId": "q135",
"format": "toon",
"model": "gpt-5-nano",
"expected": "29246",
"actual": "29246",
"isCorrect": true,
"inputTokens": 8792,
"outputTokens": 968,
"latencyMs": 11319.296415999997
},
{
"questionId": "q135",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "29246",
"actual": "29246",
"isCorrect": true,
"inputTokens": 9282,
"outputTokens": 6,
"latencyMs": 1324.4548749999376
},
{
"questionId": "q135",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "29246",
"actual": "29246",
"isCorrect": true,
"inputTokens": 12340,
"outputTokens": 5,
"latencyMs": 2740.4004170000553
},
{
"questionId": "q135",
"format": "csv",
"model": "gpt-5-nano",
"expected": "29246",
"actual": "29246",
"isCorrect": true,
"inputTokens": 8560,
"outputTokens": 392,
"latencyMs": 7471.323291999986
},
{
"questionId": "q135",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "29246",
"actual": "29246",
"isCorrect": true,
"inputTokens": 9128,
"outputTokens": 6,
"latencyMs": 1267.6016660000896
},
{
"questionId": "q135",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "29246",
"actual": "29246",
"isCorrect": true,
"inputTokens": 12210,
"outputTokens": 5,
"latencyMs": 28672.12370799994
},
{
"questionId": "q135",
"format": "xml",
"model": "gpt-5-nano",
"expected": "29246",
"actual": "29246",
"isCorrect": true,
"inputTokens": 17142,
"outputTokens": 392,
"latencyMs": 12836.502833000035
},
{
"questionId": "q135",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "29246",
"actual": "29246",
"isCorrect": true,
"inputTokens": 19807,
"outputTokens": 6,
"latencyMs": 2346.9032910000533
},
{
"questionId": "q135",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "29246",
"actual": "29246",
"isCorrect": true,
"inputTokens": 21884,
"outputTokens": 5,
"latencyMs": 2969.614082999993
},
{
"questionId": "q135",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "29246",
"actual": "29246",
"isCorrect": true,
"inputTokens": 13175,
"outputTokens": 392,
"latencyMs": 5687.641541999998
},
{
"questionId": "q135",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "29246",
"actual": "29246",
"isCorrect": true,
"inputTokens": 14486,
"outputTokens": 6,
"latencyMs": 1316.798792000045
},
{
"questionId": "q135",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "29246",
"actual": "29246",
"isCorrect": true,
"inputTokens": 17079,
"outputTokens": 5,
"latencyMs": 2823.280541000073
},
{
"questionId": "q136",
"format": "json",
"model": "gpt-5-nano",
"expected": "135306",
"actual": "135306",
"isCorrect": true,
"inputTokens": 15187,
"outputTokens": 392,
"latencyMs": 5053.899791999953
},
{
"questionId": "q136",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "135306",
"actual": "135306",
"isCorrect": true,
"inputTokens": 17407,
"outputTokens": 6,
"latencyMs": 2537.008167000022
},
{
"questionId": "q136",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "135306",
"actual": "135306",
"isCorrect": true,
"inputTokens": 19991,
"outputTokens": 6,
"latencyMs": 1954.4713340000017
},
{
"questionId": "q136",
"format": "toon",
"model": "gpt-5-nano",
"expected": "135306",
"actual": "135306",
"isCorrect": true,
"inputTokens": 8788,
"outputTokens": 3208,
"latencyMs": 26572.223459
},
{
"questionId": "q136",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "135306",
"actual": "135306",
"isCorrect": true,
"inputTokens": 9277,
"outputTokens": 6,
"latencyMs": 1112.2888329999987
},
{
"questionId": "q136",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "135306",
"actual": "135306",
"isCorrect": true,
"inputTokens": 12337,
"outputTokens": 6,
"latencyMs": 2422.114500000025
},
{
"questionId": "q136",
"format": "csv",
"model": "gpt-5-nano",
"expected": "135306",
"actual": "135306",
"isCorrect": true,
"inputTokens": 8556,
"outputTokens": 1352,
"latencyMs": 15821.266082999995
},
{
"questionId": "q136",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "135306",
"actual": "135306",
"isCorrect": true,
"inputTokens": 9123,
"outputTokens": 6,
"latencyMs": 1033.3786669999827
},
{
"questionId": "q136",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "135306",
"actual": "0",
"isCorrect": false,
"inputTokens": 12207,
"outputTokens": 1,
"latencyMs": 1657.3498749999562
},
{
"questionId": "q136",
"format": "xml",
"model": "gpt-5-nano",
"expected": "135306",
"actual": "135306",
"isCorrect": true,
"inputTokens": 17138,
"outputTokens": 328,
"latencyMs": 4357.477583000087
},
{
"questionId": "q136",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "135306",
"actual": "135306",
"isCorrect": true,
"inputTokens": 19802,
"outputTokens": 6,
"latencyMs": 1578.6591250000056
},
{
"questionId": "q136",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "135306",
"actual": "135306",
"isCorrect": true,
"inputTokens": 21881,
"outputTokens": 6,
"latencyMs": 16684.568500000052
},
{
"questionId": "q136",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "135306",
"actual": "135306",
"isCorrect": true,
"inputTokens": 13171,
"outputTokens": 712,
"latencyMs": 7845.738333999994
},
{
"questionId": "q136",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "135306",
"actual": "135306",
"isCorrect": true,
"inputTokens": 14481,
"outputTokens": 6,
"latencyMs": 1408.234832999995
},
{
"questionId": "q136",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "135306",
"actual": "135306",
"isCorrect": true,
"inputTokens": 17076,
"outputTokens": 6,
"latencyMs": 3420.9656670000404
},
{
"questionId": "q137",
"format": "json",
"model": "gpt-5-nano",
"expected": "24914",
"actual": "not found",
"isCorrect": false,
"inputTokens": 15186,
"outputTokens": 1608,
"latencyMs": 16271.314957999974
},
{
"questionId": "q137",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "24914",
"actual": "24914",
"isCorrect": true,
"inputTokens": 17408,
"outputTokens": 6,
"latencyMs": 1741.4425829999382
},
{
"questionId": "q137",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "24914",
"actual": "24914",
"isCorrect": true,
"inputTokens": 19991,
"outputTokens": 5,
"latencyMs": 4409.774542000028
},
{
"questionId": "q137",
"format": "toon",
"model": "gpt-5-nano",
"expected": "24914",
"actual": "24914",
"isCorrect": true,
"inputTokens": 8787,
"outputTokens": 1736,
"latencyMs": 16616.36137499998
},
{
"questionId": "q137",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "24914",
"actual": "24914",
"isCorrect": true,
"inputTokens": 9278,
"outputTokens": 6,
"latencyMs": 1489.443333000061
},
{
"questionId": "q137",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "24914",
"actual": "0",
"isCorrect": false,
"inputTokens": 12337,
"outputTokens": 1,
"latencyMs": 2424.8680840000743
},
{
"questionId": "q137",
"format": "csv",
"model": "gpt-5-nano",
"expected": "24914",
"actual": "24914",
"isCorrect": true,
"inputTokens": 8555,
"outputTokens": 2952,
"latencyMs": 26078.49774999998
},
{
"questionId": "q137",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "24914",
"actual": "24914",
"isCorrect": true,
"inputTokens": 9124,
"outputTokens": 6,
"latencyMs": 1111.9479170000413
},
{
"questionId": "q137",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "24914",
"actual": "24914",
"isCorrect": true,
"inputTokens": 12207,
"outputTokens": 5,
"latencyMs": 2661.1345420000143
},
{
"questionId": "q137",
"format": "xml",
"model": "gpt-5-nano",
"expected": "24914",
"actual": "not found",
"isCorrect": false,
"inputTokens": 17137,
"outputTokens": 3464,
"latencyMs": 36029.06325000001
},
{
"questionId": "q137",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "24914",
"actual": "24914",
"isCorrect": true,
"inputTokens": 19803,
"outputTokens": 6,
"latencyMs": 1756.511334000039
},
{
"questionId": "q137",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "24914",
"actual": "24914",
"isCorrect": true,
"inputTokens": 21881,
"outputTokens": 5,
"latencyMs": 1706.1073340000585
},
{
"questionId": "q137",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "24914",
"actual": "24914",
"isCorrect": true,
"inputTokens": 13170,
"outputTokens": 968,
"latencyMs": 8245.267290999996
},
{
"questionId": "q137",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "24914",
"actual": "24914",
"isCorrect": true,
"inputTokens": 14482,
"outputTokens": 6,
"latencyMs": 1405.9593330000062
},
{
"questionId": "q137",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "24914",
"actual": "24914",
"isCorrect": true,
"inputTokens": 17076,
"outputTokens": 5,
"latencyMs": 2634.141583000077
},
{
"questionId": "q138",
"format": "json",
"model": "gpt-5-nano",
"expected": "111683",
"actual": "111683",
"isCorrect": true,
"inputTokens": 15186,
"outputTokens": 520,
"latencyMs": 6238.670834000106
},
{
"questionId": "q138",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "111683",
"actual": "111683",
"isCorrect": true,
"inputTokens": 17407,
"outputTokens": 6,
"latencyMs": 1915.2061669999966
},
{
"questionId": "q138",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "111683",
"actual": "0",
"isCorrect": false,
"inputTokens": 19990,
"outputTokens": 1,
"latencyMs": 15802.735749999993
},
{
"questionId": "q138",
"format": "toon",
"model": "gpt-5-nano",
"expected": "111683",
"actual": "111683",
"isCorrect": true,
"inputTokens": 8787,
"outputTokens": 840,
"latencyMs": 9492.533834000002
},
{
"questionId": "q138",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "111683",
"actual": "111683",
"isCorrect": true,
"inputTokens": 9277,
"outputTokens": 6,
"latencyMs": 1264.6480839999858
},
{
"questionId": "q138",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "111683",
"actual": "111683",
"isCorrect": true,
"inputTokens": 12336,
"outputTokens": 6,
"latencyMs": 2581.858165999991
},
{
"questionId": "q138",
"format": "csv",
"model": "gpt-5-nano",
"expected": "111683",
"actual": "111683",
"isCorrect": true,
"inputTokens": 8555,
"outputTokens": 1736,
"latencyMs": 20963.487291999976
},
{
"questionId": "q138",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "111683",
"actual": "111683",
"isCorrect": true,
"inputTokens": 9123,
"outputTokens": 6,
"latencyMs": 2031.7733340000268
},
{
"questionId": "q138",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "111683",
"actual": "111683",
"isCorrect": true,
"inputTokens": 12206,
"outputTokens": 6,
"latencyMs": 2651.7060409999685
},
{
"questionId": "q138",
"format": "xml",
"model": "gpt-5-nano",
"expected": "111683",
"actual": "111683",
"isCorrect": true,
"inputTokens": 17137,
"outputTokens": 520,
"latencyMs": 5960.176208000048
},
{
"questionId": "q138",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "111683",
"actual": "111683",
"isCorrect": true,
"inputTokens": 19802,
"outputTokens": 6,
"latencyMs": 1636.6764170000097
},
{
"questionId": "q138",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "111683",
"actual": "111683",
"isCorrect": true,
"inputTokens": 21880,
"outputTokens": 6,
"latencyMs": 1322.0868340000743
},
{
"questionId": "q138",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "111683",
"actual": "111683",
"isCorrect": true,
"inputTokens": 13170,
"outputTokens": 264,
"latencyMs": 5836.014208000037
},
{
"questionId": "q138",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "111683",
"actual": "111683",
"isCorrect": true,
"inputTokens": 14481,
"outputTokens": 6,
"latencyMs": 1280.6878750000615
},
{
"questionId": "q138",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "111683",
"actual": "111683",
"isCorrect": true,
"inputTokens": 17075,
"outputTokens": 6,
"latencyMs": 3788.612332999939
},
{
"questionId": "q139",
"format": "json",
"model": "gpt-5-nano",
"expected": "13364",
"actual": "13364",
"isCorrect": true,
"inputTokens": 15193,
"outputTokens": 456,
"latencyMs": 6374.532041999977
},
{
"questionId": "q139",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "13364",
"actual": "13364",
"isCorrect": true,
"inputTokens": 17412,
"outputTokens": 6,
"latencyMs": 1435.1170410000486
},
{
"questionId": "q139",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "13364",
"actual": "13364",
"isCorrect": true,
"inputTokens": 19995,
"outputTokens": 5,
"latencyMs": 2480.6709170000395
},
{
"questionId": "q139",
"format": "toon",
"model": "gpt-5-nano",
"expected": "13364",
"actual": "13364",
"isCorrect": true,
"inputTokens": 8794,
"outputTokens": 904,
"latencyMs": 10770.860708000022
},
{
"questionId": "q139",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "13364",
"actual": "13364",
"isCorrect": true,
"inputTokens": 9282,
"outputTokens": 6,
"latencyMs": 1362.2076670000097
},
{
"questionId": "q139",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "13364",
"actual": "13364",
"isCorrect": true,
"inputTokens": 12341,
"outputTokens": 5,
"latencyMs": 1725.4546669999836
},
{
"questionId": "q139",
"format": "csv",
"model": "gpt-5-nano",
"expected": "13364",
"actual": "13364",
"isCorrect": true,
"inputTokens": 8562,
"outputTokens": 776,
"latencyMs": 7485.538915999932
},
{
"questionId": "q139",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "13364",
"actual": "13364",
"isCorrect": true,
"inputTokens": 9128,
"outputTokens": 6,
"latencyMs": 1517.6439580000006
},
{
"questionId": "q139",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "13364",
"actual": "13364",
"isCorrect": true,
"inputTokens": 12211,
"outputTokens": 5,
"latencyMs": 3422.7879589999793
},
{
"questionId": "q139",
"format": "xml",
"model": "gpt-5-nano",
"expected": "13364",
"actual": "13364",
"isCorrect": true,
"inputTokens": 17144,
"outputTokens": 456,
"latencyMs": 9032.850083000027
},
{
"questionId": "q139",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "13364",
"actual": "13364",
"isCorrect": true,
"inputTokens": 19807,
"outputTokens": 6,
"latencyMs": 1400.4656250000698
},
{
"questionId": "q139",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "13364",
"actual": "13364",
"isCorrect": true,
"inputTokens": 21885,
"outputTokens": 5,
"latencyMs": 1666.045665999991
},
{
"questionId": "q139",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "13364",
"actual": "13364",
"isCorrect": true,
"inputTokens": 13177,
"outputTokens": 264,
"latencyMs": 3696.009834000026
},
{
"questionId": "q139",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "13364",
"actual": "13364",
"isCorrect": true,
"inputTokens": 14486,
"outputTokens": 6,
"latencyMs": 1177.9945420000004
},
{
"questionId": "q139",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "13364",
"actual": "13364",
"isCorrect": true,
"inputTokens": 17080,
"outputTokens": 5,
"latencyMs": 1399.2657909999834
},
{
"questionId": "q140",
"format": "json",
"model": "gpt-5-nano",
"expected": "98464",
"actual": "98464",
"isCorrect": true,
"inputTokens": 15185,
"outputTokens": 520,
"latencyMs": 8902.311666999944
},
{
"questionId": "q140",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "98464",
"actual": "98464",
"isCorrect": true,
"inputTokens": 17405,
"outputTokens": 6,
"latencyMs": 1588.589624999906
},
{
"questionId": "q140",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "98464",
"actual": "98464",
"isCorrect": true,
"inputTokens": 19989,
"outputTokens": 5,
"latencyMs": 2070.6354159999173
},
{
"questionId": "q140",
"format": "toon",
"model": "gpt-5-nano",
"expected": "98464",
"actual": "98464",
"isCorrect": true,
"inputTokens": 8786,
"outputTokens": 1736,
"latencyMs": 19399.512374999933
},
{
"questionId": "q140",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "98464",
"actual": "98464",
"isCorrect": true,
"inputTokens": 9275,
"outputTokens": 6,
"latencyMs": 1322.7961249999935
},
{
"questionId": "q140",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "98464",
"actual": "0",
"isCorrect": false,
"inputTokens": 12335,
"outputTokens": 1,
"latencyMs": 2467.938582999981
},
{
"questionId": "q140",
"format": "csv",
"model": "gpt-5-nano",
"expected": "98464",
"actual": "Not found",
"isCorrect": false,
"inputTokens": 8554,
"outputTokens": 4808,
"latencyMs": 46970.624375000014
},
{
"questionId": "q140",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "98464",
"actual": "98464",
"isCorrect": true,
"inputTokens": 9121,
"outputTokens": 6,
"latencyMs": 1310.4520839999896
},
{
"questionId": "q140",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "98464",
"actual": "0",
"isCorrect": false,
"inputTokens": 12205,
"outputTokens": 1,
"latencyMs": 3555.658332999912
},
{
"questionId": "q140",
"format": "xml",
"model": "gpt-5-nano",
"expected": "98464",
"actual": "0",
"isCorrect": false,
"inputTokens": 17136,
"outputTokens": 1735,
"latencyMs": 16477.424583000015
},
{
"questionId": "q140",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "98464",
"actual": "98464",
"isCorrect": true,
"inputTokens": 19800,
"outputTokens": 6,
"latencyMs": 1970.4299579999642
},
{
"questionId": "q140",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "98464",
"actual": "98464",
"isCorrect": true,
"inputTokens": 21879,
"outputTokens": 5,
"latencyMs": 26671.477541
},
{
"questionId": "q140",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "98464",
"actual": "98464",
"isCorrect": true,
"inputTokens": 13169,
"outputTokens": 1096,
"latencyMs": 10919.952667000005
},
{
"questionId": "q140",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "98464",
"actual": "98464",
"isCorrect": true,
"inputTokens": 14479,
"outputTokens": 6,
"latencyMs": 1168.6287909999955
},
{
"questionId": "q140",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "98464",
"actual": "0",
"isCorrect": false,
"inputTokens": 17074,
"outputTokens": 1,
"latencyMs": 2765.029874999891
},
{
"questionId": "q141",
"format": "json",
"model": "gpt-5-nano",
"expected": "6378",
"actual": "6378",
"isCorrect": true,
"inputTokens": 15187,
"outputTokens": 200,
"latencyMs": 6004.068291999982
},
{
"questionId": "q141",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "6378",
"actual": "6378",
"isCorrect": true,
"inputTokens": 17408,
"outputTokens": 6,
"latencyMs": 1499.0042079999112
},
{
"questionId": "q141",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "6378",
"actual": "6378",
"isCorrect": true,
"inputTokens": 19991,
"outputTokens": 4,
"latencyMs": 2506.4855830000015
},
{
"questionId": "q141",
"format": "toon",
"model": "gpt-5-nano",
"expected": "6378",
"actual": "6378",
"isCorrect": true,
"inputTokens": 8788,
"outputTokens": 1032,
"latencyMs": 16463.560791999917
},
{
"questionId": "q141",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "6378",
"actual": "6378",
"isCorrect": true,
"inputTokens": 9278,
"outputTokens": 6,
"latencyMs": 1441.4096249999711
},
{
"questionId": "q141",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "6378",
"actual": "6378",
"isCorrect": true,
"inputTokens": 12337,
"outputTokens": 4,
"latencyMs": 2663.2737919999054
},
{
"questionId": "q141",
"format": "csv",
"model": "gpt-5-nano",
"expected": "6378",
"actual": "6378",
"isCorrect": true,
"inputTokens": 8556,
"outputTokens": 904,
"latencyMs": 9668.898624999914
},
{
"questionId": "q141",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "6378",
"actual": "6378",
"isCorrect": true,
"inputTokens": 9124,
"outputTokens": 6,
"latencyMs": 1173.9928749999963
},
{
"questionId": "q141",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "6378",
"actual": "0",
"isCorrect": false,
"inputTokens": 12207,
"outputTokens": 1,
"latencyMs": 9857.754333000048
},
{
"questionId": "q141",
"format": "xml",
"model": "gpt-5-nano",
"expected": "6378",
"actual": "6378",
"isCorrect": true,
"inputTokens": 17138,
"outputTokens": 392,
"latencyMs": 9638.438333999948
},
{
"questionId": "q141",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "6378",
"actual": "6378",
"isCorrect": true,
"inputTokens": 19803,
"outputTokens": 6,
"latencyMs": 1636.777374999947
},
{
"questionId": "q141",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "6378",
"actual": "6378",
"isCorrect": true,
"inputTokens": 21881,
"outputTokens": 4,
"latencyMs": 1841.5572499999544
},
{
"questionId": "q141",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "6378",
"actual": "6378",
"isCorrect": true,
"inputTokens": 13171,
"outputTokens": 328,
"latencyMs": 5539.711917000008
},
{
"questionId": "q141",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "6378",
"actual": "6378",
"isCorrect": true,
"inputTokens": 14482,
"outputTokens": 6,
"latencyMs": 1485.2025829999475
},
{
"questionId": "q141",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "6378",
"actual": "6378",
"isCorrect": true,
"inputTokens": 17076,
"outputTokens": 4,
"latencyMs": 1622.3209579999093
},
{
"questionId": "q142",
"format": "json",
"model": "gpt-5-nano",
"expected": "254916",
"actual": "254916",
"isCorrect": true,
"inputTokens": 15189,
"outputTokens": 456,
"latencyMs": 5173.022708000033
},
{
"questionId": "q142",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "254916",
"actual": "254916",
"isCorrect": true,
"inputTokens": 17409,
"outputTokens": 6,
"latencyMs": 1700.1781669999473
},
{
"questionId": "q142",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "254916",
"actual": "254916",
"isCorrect": true,
"inputTokens": 19992,
"outputTokens": 6,
"latencyMs": 2883.810959000024
},
{
"questionId": "q142",
"format": "toon",
"model": "gpt-5-nano",
"expected": "254916",
"actual": "254916",
"isCorrect": true,
"inputTokens": 8790,
"outputTokens": 1352,
"latencyMs": 14519.361791000003
},
{
"questionId": "q142",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "254916",
"actual": "254916",
"isCorrect": true,
"inputTokens": 9279,
"outputTokens": 6,
"latencyMs": 1391.6377499999944
},
{
"questionId": "q142",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "254916",
"actual": "254916",
"isCorrect": true,
"inputTokens": 12338,
"outputTokens": 6,
"latencyMs": 2150.8105409999844
},
{
"questionId": "q142",
"format": "csv",
"model": "gpt-5-nano",
"expected": "254916",
"actual": "254916",
"isCorrect": true,
"inputTokens": 8558,
"outputTokens": 968,
"latencyMs": 12890.400166000007
},
{
"questionId": "q142",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "254916",
"actual": "254916",
"isCorrect": true,
"inputTokens": 9125,
"outputTokens": 6,
"latencyMs": 1352.297750000027
},
{
"questionId": "q142",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "254916",
"actual": "254916",
"isCorrect": true,
"inputTokens": 12208,
"outputTokens": 6,
"latencyMs": 3035.361290999921
},
{
"questionId": "q142",
"format": "xml",
"model": "gpt-5-nano",
"expected": "254916",
"actual": "254916",
"isCorrect": true,
"inputTokens": 17140,
"outputTokens": 648,
"latencyMs": 26188.04208299995
},
{
"questionId": "q142",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "254916",
"actual": "254916",
"isCorrect": true,
"inputTokens": 19804,
"outputTokens": 6,
"latencyMs": 1935.45787500008
},
{
"questionId": "q142",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "254916",
"actual": "254916",
"isCorrect": true,
"inputTokens": 21882,
"outputTokens": 6,
"latencyMs": 5415.2192920000525
},
{
"questionId": "q142",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "254916",
"actual": "254916",
"isCorrect": true,
"inputTokens": 13173,
"outputTokens": 648,
"latencyMs": 6512.995166999986
},
{
"questionId": "q142",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "254916",
"actual": "254916",
"isCorrect": true,
"inputTokens": 14483,
"outputTokens": 6,
"latencyMs": 1957.1825840000529
},
{
"questionId": "q142",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "254916",
"actual": "254916",
"isCorrect": true,
"inputTokens": 17077,
"outputTokens": 6,
"latencyMs": 1273.1987079998944
},
{
"questionId": "q143",
"format": "json",
"model": "gpt-5-nano",
"expected": "32413",
"actual": "32413",
"isCorrect": true,
"inputTokens": 15187,
"outputTokens": 712,
"latencyMs": 7402.821666999953
},
{
"questionId": "q143",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "32413",
"actual": "32413",
"isCorrect": true,
"inputTokens": 17410,
"outputTokens": 6,
"latencyMs": 1297.3980420000153
},
{
"questionId": "q143",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "32413",
"actual": "32413",
"isCorrect": true,
"inputTokens": 19993,
"outputTokens": 5,
"latencyMs": 1398.1769159999676
},
{
"questionId": "q143",
"format": "toon",
"model": "gpt-5-nano",
"expected": "32413",
"actual": "32413",
"isCorrect": true,
"inputTokens": 8788,
"outputTokens": 520,
"latencyMs": 8047.9024590000045
},
{
"questionId": "q143",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "32413",
"actual": "32413",
"isCorrect": true,
"inputTokens": 9280,
"outputTokens": 6,
"latencyMs": 1149.3695000000298
},
{
"questionId": "q143",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "32413",
"actual": "32413",
"isCorrect": true,
"inputTokens": 12339,
"outputTokens": 5,
"latencyMs": 3275.751125000068
},
{
"questionId": "q143",
"format": "csv",
"model": "gpt-5-nano",
"expected": "32413",
"actual": "32413",
"isCorrect": true,
"inputTokens": 8556,
"outputTokens": 520,
"latencyMs": 10626.252958000056
},
{
"questionId": "q143",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "32413",
"actual": "32413",
"isCorrect": true,
"inputTokens": 9126,
"outputTokens": 6,
"latencyMs": 1084.1253329999745
},
{
"questionId": "q143",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "32413",
"actual": "32413",
"isCorrect": true,
"inputTokens": 12209,
"outputTokens": 5,
"latencyMs": 2478.551666000043
},
{
"questionId": "q143",
"format": "xml",
"model": "gpt-5-nano",
"expected": "32413",
"actual": "43222",
"isCorrect": false,
"inputTokens": 17138,
"outputTokens": 2248,
"latencyMs": 24645.130125000025
},
{
"questionId": "q143",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "32413",
"actual": "32413",
"isCorrect": true,
"inputTokens": 19805,
"outputTokens": 6,
"latencyMs": 1504.6681670000544
},
{
"questionId": "q143",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "32413",
"actual": "32413",
"isCorrect": true,
"inputTokens": 21883,
"outputTokens": 5,
"latencyMs": 1577.2633330000099
},
{
"questionId": "q143",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "32413",
"actual": "32413",
"isCorrect": true,
"inputTokens": 13171,
"outputTokens": 776,
"latencyMs": 8342.271167000057
},
{
"questionId": "q143",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "32413",
"actual": "32413",
"isCorrect": true,
"inputTokens": 14484,
"outputTokens": 6,
"latencyMs": 1397.2225839999737
},
{
"questionId": "q143",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "32413",
"actual": "32413",
"isCorrect": true,
"inputTokens": 17078,
"outputTokens": 5,
"latencyMs": 2600.8139589999337
},
{
"questionId": "q144",
"format": "json",
"model": "gpt-5-nano",
"expected": "240059",
"actual": "240059",
"isCorrect": true,
"inputTokens": 15185,
"outputTokens": 648,
"latencyMs": 10642.901458999957
},
{
"questionId": "q144",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "240059",
"actual": "240059",
"isCorrect": true,
"inputTokens": 17405,
"outputTokens": 6,
"latencyMs": 1309.3054169999668
},
{
"questionId": "q144",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "240059",
"actual": "240059",
"isCorrect": true,
"inputTokens": 19989,
"outputTokens": 6,
"latencyMs": 1797.455083000008
},
{
"questionId": "q144",
"format": "toon",
"model": "gpt-5-nano",
"expected": "240059",
"actual": "240059",
"isCorrect": true,
"inputTokens": 8786,
"outputTokens": 1096,
"latencyMs": 11485.876249999972
},
{
"questionId": "q144",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "240059",
"actual": "240059",
"isCorrect": true,
"inputTokens": 9275,
"outputTokens": 6,
"latencyMs": 1909.1485000000102
},
{
"questionId": "q144",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "240059",
"actual": "0",
"isCorrect": false,
"inputTokens": 12335,
"outputTokens": 1,
"latencyMs": 2114.457832999993
},
{
"questionId": "q144",
"format": "csv",
"model": "gpt-5-nano",
"expected": "240059",
"actual": "Not found",
"isCorrect": false,
"inputTokens": 8554,
"outputTokens": 2760,
"latencyMs": 36680.54220799997
},
{
"questionId": "q144",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "240059",
"actual": "240059",
"isCorrect": true,
"inputTokens": 9121,
"outputTokens": 6,
"latencyMs": 1069.4299589999719
},
{
"questionId": "q144",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "240059",
"actual": "240059",
"isCorrect": true,
"inputTokens": 12205,
"outputTokens": 6,
"latencyMs": 2047.3995000000577
},
{
"questionId": "q144",
"format": "xml",
"model": "gpt-5-nano",
"expected": "240059",
"actual": "240059",
"isCorrect": true,
"inputTokens": 17136,
"outputTokens": 456,
"latencyMs": 8763.321875000023
},
{
"questionId": "q144",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "240059",
"actual": "240059",
"isCorrect": true,
"inputTokens": 19800,
"outputTokens": 6,
"latencyMs": 1591.410208000103
},
{
"questionId": "q144",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "240059",
"actual": "240059",
"isCorrect": true,
"inputTokens": 21879,
"outputTokens": 6,
"latencyMs": 1814.5240000000922
},
{
"questionId": "q144",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "240059",
"actual": "0",
"isCorrect": false,
"inputTokens": 13169,
"outputTokens": 2951,
"latencyMs": 28527.662250000052
},
{
"questionId": "q144",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "240059",
"actual": "240059",
"isCorrect": true,
"inputTokens": 14479,
"outputTokens": 6,
"latencyMs": 1341.8624169999966
},
{
"questionId": "q144",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "240059",
"actual": "240059",
"isCorrect": true,
"inputTokens": 17074,
"outputTokens": 6,
"latencyMs": 2672.0011249999516
},
{
"questionId": "q145",
"format": "json",
"model": "gpt-5-nano",
"expected": "48986",
"actual": "48986",
"isCorrect": true,
"inputTokens": 15186,
"outputTokens": 1288,
"latencyMs": 11650.464916000026
},
{
"questionId": "q145",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "48986",
"actual": "48986",
"isCorrect": true,
"inputTokens": 17406,
"outputTokens": 6,
"latencyMs": 1736.123957999982
},
{
"questionId": "q145",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "48986",
"actual": "48986",
"isCorrect": true,
"inputTokens": 19989,
"outputTokens": 5,
"latencyMs": 2115.1809580000117
},
{
"questionId": "q145",
"format": "toon",
"model": "gpt-5-nano",
"expected": "48986",
"actual": "undefined",
"isCorrect": false,
"inputTokens": 8787,
"outputTokens": 2119,
"latencyMs": 22429.965708000003
},
{
"questionId": "q145",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "48986",
"actual": "48986",
"isCorrect": true,
"inputTokens": 9276,
"outputTokens": 6,
"latencyMs": 1280.45074999996
},
{
"questionId": "q145",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "48986",
"actual": "0",
"isCorrect": false,
"inputTokens": 12335,
"outputTokens": 1,
"latencyMs": 2039.6975419999799
},
{
"questionId": "q145",
"format": "csv",
"model": "gpt-5-nano",
"expected": "48986",
"actual": "48986",
"isCorrect": true,
"inputTokens": 8555,
"outputTokens": 1352,
"latencyMs": 13713.023125000065
},
{
"questionId": "q145",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "48986",
"actual": "48986",
"isCorrect": true,
"inputTokens": 9122,
"outputTokens": 6,
"latencyMs": 1190.7314999999944
},
{
"questionId": "q145",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "48986",
"actual": "None",
"isCorrect": false,
"inputTokens": 12205,
"outputTokens": 1,
"latencyMs": 3054.557584000053
},
{
"questionId": "q145",
"format": "xml",
"model": "gpt-5-nano",
"expected": "48986",
"actual": "48986",
"isCorrect": true,
"inputTokens": 17137,
"outputTokens": 456,
"latencyMs": 8163.3440420000115
},
{
"questionId": "q145",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "48986",
"actual": "48986",
"isCorrect": true,
"inputTokens": 19801,
"outputTokens": 6,
"latencyMs": 2508.831208000076
},
{
"questionId": "q145",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "48986",
"actual": "0",
"isCorrect": false,
"inputTokens": 21879,
"outputTokens": 1,
"latencyMs": 13907.184875000035
},
{
"questionId": "q145",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "48986",
"actual": "48986",
"isCorrect": true,
"inputTokens": 13170,
"outputTokens": 968,
"latencyMs": 9999.614625000046
},
{
"questionId": "q145",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "48986",
"actual": "48986",
"isCorrect": true,
"inputTokens": 14480,
"outputTokens": 6,
"latencyMs": 1401.668834000011
},
{
"questionId": "q145",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "48986",
"actual": "48986",
"isCorrect": true,
"inputTokens": 17074,
"outputTokens": 5,
"latencyMs": 3342.504416999989
},
{
"questionId": "q146",
"format": "json",
"model": "gpt-5-nano",
"expected": "209624",
"actual": "0",
"isCorrect": false,
"inputTokens": 15185,
"outputTokens": 1607,
"latencyMs": 14253.204374999972
},
{
"questionId": "q146",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "209624",
"actual": "209624",
"isCorrect": true,
"inputTokens": 17405,
"outputTokens": 6,
"latencyMs": 1633.1817499999888
},
{
"questionId": "q146",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "209624",
"actual": "209624",
"isCorrect": true,
"inputTokens": 19989,
"outputTokens": 6,
"latencyMs": 4013.2274579999503
},
{
"questionId": "q146",
"format": "toon",
"model": "gpt-5-nano",
"expected": "209624",
"actual": "209624",
"isCorrect": true,
"inputTokens": 8786,
"outputTokens": 1864,
"latencyMs": 18068.214749999926
},
{
"questionId": "q146",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "209624",
"actual": "209624",
"isCorrect": true,
"inputTokens": 9275,
"outputTokens": 6,
"latencyMs": 2633.8406670000404
},
{
"questionId": "q146",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "209624",
"actual": "209624",
"isCorrect": true,
"inputTokens": 12335,
"outputTokens": 6,
"latencyMs": 2308.719957999885
},
{
"questionId": "q146",
"format": "csv",
"model": "gpt-5-nano",
"expected": "209624",
"actual": "209624",
"isCorrect": true,
"inputTokens": 8554,
"outputTokens": 3592,
"latencyMs": 34956.612250000006
},
{
"questionId": "q146",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "209624",
"actual": "209624",
"isCorrect": true,
"inputTokens": 9121,
"outputTokens": 6,
"latencyMs": 1042.174875000026
},
{
"questionId": "q146",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "209624",
"actual": "Not found",
"isCorrect": false,
"inputTokens": 12205,
"outputTokens": 2,
"latencyMs": 3570.2167079999344
},
{
"questionId": "q146",
"format": "xml",
"model": "gpt-5-nano",
"expected": "209624",
"actual": "209624",
"isCorrect": true,
"inputTokens": 17136,
"outputTokens": 584,
"latencyMs": 8155.267999999924
},
{
"questionId": "q146",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "209624",
"actual": "209624",
"isCorrect": true,
"inputTokens": 19800,
"outputTokens": 6,
"latencyMs": 1908.0532499999972
},
{
"questionId": "q146",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "209624",
"actual": "209624",
"isCorrect": true,
"inputTokens": 21879,
"outputTokens": 6,
"latencyMs": 4646.213583000004
},
{
"questionId": "q146",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "209624",
"actual": "209624",
"isCorrect": true,
"inputTokens": 13169,
"outputTokens": 392,
"latencyMs": 8023.040708000073
},
{
"questionId": "q146",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "209624",
"actual": "209624",
"isCorrect": true,
"inputTokens": 14479,
"outputTokens": 6,
"latencyMs": 1252.574666999979
},
{
"questionId": "q146",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "209624",
"actual": "0",
"isCorrect": false,
"inputTokens": 17074,
"outputTokens": 1,
"latencyMs": 9256.544125000015
},
{
"questionId": "q147",
"format": "json",
"model": "gpt-5-nano",
"expected": "58023",
"actual": "58023",
"isCorrect": true,
"inputTokens": 15185,
"outputTokens": 328,
"latencyMs": 6800.243999999948
},
{
"questionId": "q147",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "58023",
"actual": "58023",
"isCorrect": true,
"inputTokens": 17406,
"outputTokens": 6,
"latencyMs": 1856.026916999952
},
{
"questionId": "q147",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "58023",
"actual": "0",
"isCorrect": false,
"inputTokens": 19989,
"outputTokens": 1,
"latencyMs": 1783.4203330000164
},
{
"questionId": "q147",
"format": "toon",
"model": "gpt-5-nano",
"expected": "58023",
"actual": "58023",
"isCorrect": true,
"inputTokens": 8786,
"outputTokens": 904,
"latencyMs": 8408.46395799995
},
{
"questionId": "q147",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "58023",
"actual": "58023",
"isCorrect": true,
"inputTokens": 9276,
"outputTokens": 6,
"latencyMs": 1048.0284159999574
},
{
"questionId": "q147",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "58023",
"actual": "0",
"isCorrect": false,
"inputTokens": 12335,
"outputTokens": 1,
"latencyMs": 2309.89829199994
},
{
"questionId": "q147",
"format": "csv",
"model": "gpt-5-nano",
"expected": "58023",
"actual": "58023",
"isCorrect": true,
"inputTokens": 8554,
"outputTokens": 456,
"latencyMs": 7778.412583000027
},
{
"questionId": "q147",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "58023",
"actual": "58023",
"isCorrect": true,
"inputTokens": 9122,
"outputTokens": 6,
"latencyMs": 1095.3032080000266
},
{
"questionId": "q147",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "58023",
"actual": "58023",
"isCorrect": true,
"inputTokens": 12205,
"outputTokens": 5,
"latencyMs": 2191.419332999969
},
{
"questionId": "q147",
"format": "xml",
"model": "gpt-5-nano",
"expected": "58023",
"actual": "58023",
"isCorrect": true,
"inputTokens": 17136,
"outputTokens": 328,
"latencyMs": 5028.444708000054
},
{
"questionId": "q147",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "58023",
"actual": "58023",
"isCorrect": true,
"inputTokens": 19801,
"outputTokens": 6,
"latencyMs": 1697.0504170000786
},
{
"questionId": "q147",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "58023",
"actual": "58023",
"isCorrect": true,
"inputTokens": 21879,
"outputTokens": 5,
"latencyMs": 1800.0818329999456
},
{
"questionId": "q147",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "58023",
"actual": "58023",
"isCorrect": true,
"inputTokens": 13169,
"outputTokens": 712,
"latencyMs": 8022.871625000029
},
{
"questionId": "q147",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "58023",
"actual": "58023",
"isCorrect": true,
"inputTokens": 14480,
"outputTokens": 6,
"latencyMs": 1105.1744999999646
},
{
"questionId": "q147",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "58023",
"actual": "58023",
"isCorrect": true,
"inputTokens": 17074,
"outputTokens": 5,
"latencyMs": 2765.7437500000233
},
{
"questionId": "q148",
"format": "json",
"model": "gpt-5-nano",
"expected": "196024",
"actual": "196024",
"isCorrect": true,
"inputTokens": 15188,
"outputTokens": 328,
"latencyMs": 4684.178457999951
},
{
"questionId": "q148",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "196024",
"actual": "196024",
"isCorrect": true,
"inputTokens": 17407,
"outputTokens": 6,
"latencyMs": 1856.438208000036
},
{
"questionId": "q148",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "196024",
"actual": "196024",
"isCorrect": true,
"inputTokens": 19991,
"outputTokens": 6,
"latencyMs": 4894.268209000002
},
{
"questionId": "q148",
"format": "toon",
"model": "gpt-5-nano",
"expected": "196024",
"actual": "196024",
"isCorrect": true,
"inputTokens": 8789,
"outputTokens": 1608,
"latencyMs": 19985.54383400001
},
{
"questionId": "q148",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "196024",
"actual": "196024",
"isCorrect": true,
"inputTokens": 9277,
"outputTokens": 6,
"latencyMs": 1212.5407500000438
},
{
"questionId": "q148",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "196024",
"actual": "N/A",
"isCorrect": false,
"inputTokens": 12337,
"outputTokens": 3,
"latencyMs": 12548.686624999973
},
{
"questionId": "q148",
"format": "csv",
"model": "gpt-5-nano",
"expected": "196024",
"actual": "196024",
"isCorrect": true,
"inputTokens": 8557,
"outputTokens": 2760,
"latencyMs": 20131.88070800004
},
{
"questionId": "q148",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "196024",
"actual": "196024",
"isCorrect": true,
"inputTokens": 9123,
"outputTokens": 6,
"latencyMs": 1217.2275000000373
},
{
"questionId": "q148",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "196024",
"actual": "196024",
"isCorrect": true,
"inputTokens": 12207,
"outputTokens": 6,
"latencyMs": 2748.620916999993
},
{
"questionId": "q148",
"format": "xml",
"model": "gpt-5-nano",
"expected": "196024",
"actual": "196024",
"isCorrect": true,
"inputTokens": 17139,
"outputTokens": 392,
"latencyMs": 6418.833957999945
},
{
"questionId": "q148",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "196024",
"actual": "196024",
"isCorrect": true,
"inputTokens": 19802,
"outputTokens": 6,
"latencyMs": 2019.8872089999495
},
{
"questionId": "q148",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "196024",
"actual": "196024",
"isCorrect": true,
"inputTokens": 21881,
"outputTokens": 6,
"latencyMs": 2523.128167000017
},
{
"questionId": "q148",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "196024",
"actual": "196024",
"isCorrect": true,
"inputTokens": 13172,
"outputTokens": 584,
"latencyMs": 8212.874959000037
},
{
"questionId": "q148",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "196024",
"actual": "196024",
"isCorrect": true,
"inputTokens": 14481,
"outputTokens": 6,
"latencyMs": 1151.26241700002
},
{
"questionId": "q148",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "196024",
"actual": "196024",
"isCorrect": true,
"inputTokens": 17076,
"outputTokens": 6,
"latencyMs": 3479.8169999999227
},
{
"questionId": "q149",
"format": "json",
"model": "gpt-5-nano",
"expected": "30919",
"actual": "30919",
"isCorrect": true,
"inputTokens": 15188,
"outputTokens": 456,
"latencyMs": 6856.402957999962
},
{
"questionId": "q149",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "30919",
"actual": "30919",
"isCorrect": true,
"inputTokens": 17408,
"outputTokens": 6,
"latencyMs": 1727.7318750000559
},
{
"questionId": "q149",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "30919",
"actual": "30919",
"isCorrect": true,
"inputTokens": 19991,
"outputTokens": 5,
"latencyMs": 5595.708332999959
},
{
"questionId": "q149",
"format": "toon",
"model": "gpt-5-nano",
"expected": "30919",
"actual": "30919",
"isCorrect": true,
"inputTokens": 8789,
"outputTokens": 584,
"latencyMs": 5889.62179200002
},
{
"questionId": "q149",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "30919",
"actual": "30919",
"isCorrect": true,
"inputTokens": 9278,
"outputTokens": 6,
"latencyMs": 1206.469458000036
},
{
"questionId": "q149",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "30919",
"actual": "30919",
"isCorrect": true,
"inputTokens": 12337,
"outputTokens": 5,
"latencyMs": 2057.8787500000326
},
{
"questionId": "q149",
"format": "csv",
"model": "gpt-5-nano",
"expected": "30919",
"actual": "30919",
"isCorrect": true,
"inputTokens": 8557,
"outputTokens": 584,
"latencyMs": 6905.8247499999125
},
{
"questionId": "q149",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "30919",
"actual": "30919",
"isCorrect": true,
"inputTokens": 9124,
"outputTokens": 6,
"latencyMs": 1003.953542000032
},
{
"questionId": "q149",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "30919",
"actual": "0",
"isCorrect": false,
"inputTokens": 12207,
"outputTokens": 1,
"latencyMs": 2500.2377919999417
},
{
"questionId": "q149",
"format": "xml",
"model": "gpt-5-nano",
"expected": "30919",
"actual": "30919",
"isCorrect": true,
"inputTokens": 17139,
"outputTokens": 264,
"latencyMs": 4909.18979199999
},
{
"questionId": "q149",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "30919",
"actual": "30919",
"isCorrect": true,
"inputTokens": 19803,
"outputTokens": 6,
"latencyMs": 2457.2324580000713
},
{
"questionId": "q149",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "30919",
"actual": "30919",
"isCorrect": true,
"inputTokens": 21881,
"outputTokens": 5,
"latencyMs": 1428.471666000085
},
{
"questionId": "q149",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "30919",
"actual": "30919",
"isCorrect": true,
"inputTokens": 13172,
"outputTokens": 392,
"latencyMs": 5668.693708000006
},
{
"questionId": "q149",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "30919",
"actual": "30919",
"isCorrect": true,
"inputTokens": 14482,
"outputTokens": 6,
"latencyMs": 1222.2983330000425
},
{
"questionId": "q149",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "30919",
"actual": "30919",
"isCorrect": true,
"inputTokens": 17076,
"outputTokens": 5,
"latencyMs": 3050.278290999937
},
{
"questionId": "q150",
"format": "json",
"model": "gpt-5-nano",
"expected": "192220",
"actual": "192220",
"isCorrect": true,
"inputTokens": 15187,
"outputTokens": 456,
"latencyMs": 7561.326083000051
},
{
"questionId": "q150",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "192220",
"actual": "192220",
"isCorrect": true,
"inputTokens": 17405,
"outputTokens": 6,
"latencyMs": 2041.015417000046
},
{
"questionId": "q150",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "192220",
"actual": "192220",
"isCorrect": true,
"inputTokens": 19989,
"outputTokens": 6,
"latencyMs": 1918.6380409999983
},
{
"questionId": "q150",
"format": "toon",
"model": "gpt-5-nano",
"expected": "192220",
"actual": "192220",
"isCorrect": true,
"inputTokens": 8788,
"outputTokens": 776,
"latencyMs": 7871.997415999998
},
{
"questionId": "q150",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "192220",
"actual": "192220",
"isCorrect": true,
"inputTokens": 9275,
"outputTokens": 6,
"latencyMs": 1578.9285829999717
},
{
"questionId": "q150",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "192220",
"actual": "192220",
"isCorrect": true,
"inputTokens": 12335,
"outputTokens": 6,
"latencyMs": 2032.75475000008
},
{
"questionId": "q150",
"format": "csv",
"model": "gpt-5-nano",
"expected": "192220",
"actual": "0",
"isCorrect": false,
"inputTokens": 8556,
"outputTokens": 1159,
"latencyMs": 30959.83791699994
},
{
"questionId": "q150",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "192220",
"actual": "192220",
"isCorrect": true,
"inputTokens": 9121,
"outputTokens": 6,
"latencyMs": 1389.4868339999812
},
{
"questionId": "q150",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "192220",
"actual": "0",
"isCorrect": false,
"inputTokens": 12205,
"outputTokens": 1,
"latencyMs": 3573.9437089998974
},
{
"questionId": "q150",
"format": "xml",
"model": "gpt-5-nano",
"expected": "192220",
"actual": "192220",
"isCorrect": true,
"inputTokens": 17138,
"outputTokens": 392,
"latencyMs": 6992.854374999995
},
{
"questionId": "q150",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "192220",
"actual": "192220",
"isCorrect": true,
"inputTokens": 19800,
"outputTokens": 6,
"latencyMs": 1679.577958000009
},
{
"questionId": "q150",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "192220",
"actual": "192220",
"isCorrect": true,
"inputTokens": 21879,
"outputTokens": 6,
"latencyMs": 1553.5702499999898
},
{
"questionId": "q150",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "192220",
"actual": "192220",
"isCorrect": true,
"inputTokens": 13171,
"outputTokens": 328,
"latencyMs": 4169.634166999953
},
{
"questionId": "q150",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "192220",
"actual": "192220",
"isCorrect": true,
"inputTokens": 14479,
"outputTokens": 6,
"latencyMs": 1384.3902089999756
},
{
"questionId": "q150",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "192220",
"actual": "192220",
"isCorrect": true,
"inputTokens": 17074,
"outputTokens": 6,
"latencyMs": 2953.2877919999883
},
{
"questionId": "q151",
"format": "json",
"model": "gpt-5-nano",
"expected": "11763",
"actual": "11763",
"isCorrect": true,
"inputTokens": 15190,
"outputTokens": 584,
"latencyMs": 6612.153208000003
},
{
"questionId": "q151",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "11763",
"actual": "11763",
"isCorrect": true,
"inputTokens": 17414,
"outputTokens": 6,
"latencyMs": 2259.919874999905
},
{
"questionId": "q151",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "11763",
"actual": "11763",
"isCorrect": true,
"inputTokens": 19997,
"outputTokens": 5,
"latencyMs": 4557.873041000101
},
{
"questionId": "q151",
"format": "toon",
"model": "gpt-5-nano",
"expected": "11763",
"actual": "11763",
"isCorrect": true,
"inputTokens": 8791,
"outputTokens": 712,
"latencyMs": 7556.261375000002
},
{
"questionId": "q151",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "11763",
"actual": "11763",
"isCorrect": true,
"inputTokens": 9284,
"outputTokens": 6,
"latencyMs": 1012.9206669999985
},
{
"questionId": "q151",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "11763",
"actual": "11763",
"isCorrect": true,
"inputTokens": 12343,
"outputTokens": 5,
"latencyMs": 6754.191916999989
},
{
"questionId": "q151",
"format": "csv",
"model": "gpt-5-nano",
"expected": "11763",
"actual": "11763",
"isCorrect": true,
"inputTokens": 8559,
"outputTokens": 712,
"latencyMs": 7742.647875000024
},
{
"questionId": "q151",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "11763",
"actual": "11763",
"isCorrect": true,
"inputTokens": 9130,
"outputTokens": 6,
"latencyMs": 1578.1971669999184
},
{
"questionId": "q151",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "11763",
"actual": "11763",
"isCorrect": true,
"inputTokens": 12213,
"outputTokens": 5,
"latencyMs": 7366.954833999975
},
{
"questionId": "q151",
"format": "xml",
"model": "gpt-5-nano",
"expected": "11763",
"actual": "11763",
"isCorrect": true,
"inputTokens": 17141,
"outputTokens": 328,
"latencyMs": 6099.567540999968
},
{
"questionId": "q151",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "11763",
"actual": "11763",
"isCorrect": true,
"inputTokens": 19809,
"outputTokens": 6,
"latencyMs": 1278.9319580000592
},
{
"questionId": "q151",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "11763",
"actual": "11763",
"isCorrect": true,
"inputTokens": 21887,
"outputTokens": 5,
"latencyMs": 4035.024666000041
},
{
"questionId": "q151",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "11763",
"actual": "11763",
"isCorrect": true,
"inputTokens": 13174,
"outputTokens": 456,
"latencyMs": 4068.7430829999503
},
{
"questionId": "q151",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "11763",
"actual": "11763",
"isCorrect": true,
"inputTokens": 14488,
"outputTokens": 6,
"latencyMs": 1183.168624999933
},
{
"questionId": "q151",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "11763",
"actual": "11763",
"isCorrect": true,
"inputTokens": 17082,
"outputTokens": 5,
"latencyMs": 1311.251791000017
},
{
"questionId": "q152",
"format": "json",
"model": "gpt-5-nano",
"expected": "100",
"actual": "114",
"isCorrect": false,
"inputTokens": 15187,
"outputTokens": 3271,
"latencyMs": 26292.3486250001
},
{
"questionId": "q152",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "0",
"isCorrect": false,
"inputTokens": 17406,
"outputTokens": 5,
"latencyMs": 1269.8386670000618
},
{
"questionId": "q152",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "100",
"actual": "2",
"isCorrect": false,
"inputTokens": 19990,
"outputTokens": 1,
"latencyMs": 1418.8326250000391
},
{
"questionId": "q152",
"format": "toon",
"model": "gpt-5-nano",
"expected": "100",
"actual": "100",
"isCorrect": true,
"inputTokens": 8788,
"outputTokens": 711,
"latencyMs": 7467.631458999938
},
{
"questionId": "q152",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "0",
"isCorrect": false,
"inputTokens": 9276,
"outputTokens": 5,
"latencyMs": 1310.1392090000445
},
{
"questionId": "q152",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "100",
"actual": "0",
"isCorrect": false,
"inputTokens": 12336,
"outputTokens": 1,
"latencyMs": 2714.426749999984
},
{
"questionId": "q152",
"format": "csv",
"model": "gpt-5-nano",
"expected": "100",
"actual": "0",
"isCorrect": false,
"inputTokens": 8556,
"outputTokens": 903,
"latencyMs": 10460.54125000001
},
{
"questionId": "q152",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "0",
"isCorrect": false,
"inputTokens": 9122,
"outputTokens": 5,
"latencyMs": 1165.5718329999363
},
{
"questionId": "q152",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "100",
"actual": "0",
"isCorrect": false,
"inputTokens": 12206,
"outputTokens": 1,
"latencyMs": 6584.999583999976
},
{
"questionId": "q152",
"format": "xml",
"model": "gpt-5-nano",
"expected": "100",
"actual": "0",
"isCorrect": false,
"inputTokens": 17138,
"outputTokens": 519,
"latencyMs": 7805.630750000011
},
{
"questionId": "q152",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "0",
"isCorrect": false,
"inputTokens": 19801,
"outputTokens": 5,
"latencyMs": 1370.0252500000643
},
{
"questionId": "q152",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "100",
"actual": "0",
"isCorrect": false,
"inputTokens": 21880,
"outputTokens": 1,
"latencyMs": 1457.9777079999913
},
{
"questionId": "q152",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "100",
"actual": "100",
"isCorrect": true,
"inputTokens": 13171,
"outputTokens": 2055,
"latencyMs": 73627.54529200005
},
{
"questionId": "q152",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "0",
"isCorrect": false,
"inputTokens": 14480,
"outputTokens": 5,
"latencyMs": 1786.1586249999236
},
{
"questionId": "q152",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "100",
"actual": "2",
"isCorrect": false,
"inputTokens": 17075,
"outputTokens": 1,
"latencyMs": 19150.725124999997
},
{
"questionId": "q153",
"format": "json",
"model": "gpt-5-nano",
"expected": "15404143",
"actual": "13886916",
"isCorrect": false,
"inputTokens": 15188,
"outputTokens": 5833,
"latencyMs": 354484.18529200007
},
{
"questionId": "q153",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "15404143",
"actual": "13,847,892",
"isCorrect": false,
"inputTokens": 17407,
"outputTokens": 9,
"latencyMs": 1871.1713750000345
},
{
"questionId": "q153",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "15404143",
"actual": "12990000",
"isCorrect": false,
"inputTokens": 19991,
"outputTokens": 8,
"latencyMs": 155538.94058299996
},
{
"questionId": "q153",
"format": "toon",
"model": "gpt-5-nano",
"expected": "15404143",
"actual": "15404143",
"isCorrect": true,
"inputTokens": 8789,
"outputTokens": 5577,
"latencyMs": 46411.59825000004
},
{
"questionId": "q153",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "15404143",
"actual": "13,847,892",
"isCorrect": false,
"inputTokens": 9277,
"outputTokens": 9,
"latencyMs": 1184.7457910000812
},
{
"questionId": "q153",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "15404143",
"actual": "14371343",
"isCorrect": false,
"inputTokens": 12337,
"outputTokens": 8,
"latencyMs": 27093.977375000017
},
{
"questionId": "q153",
"format": "csv",
"model": "gpt-5-nano",
"expected": "15404143",
"actual": "15404143",
"isCorrect": true,
"inputTokens": 8557,
"outputTokens": 5321,
"latencyMs": 40838.23450000002
},
{
"questionId": "q153",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "15404143",
"actual": "15,847,892",
"isCorrect": false,
"inputTokens": 9123,
"outputTokens": 9,
"latencyMs": 1243.0417080000043
},
{
"questionId": "q153",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "15404143",
"actual": "10000000",
"isCorrect": false,
"inputTokens": 12207,
"outputTokens": 8,
"latencyMs": 1697.566125000012
},
{
"questionId": "q153",
"format": "xml",
"model": "gpt-5-nano",
"expected": "15404143",
"actual": "11887802",
"isCorrect": false,
"inputTokens": 17139,
"outputTokens": 3465,
"latencyMs": 35017.48091599997
},
{
"questionId": "q153",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "15404143",
"actual": "10,847,892",
"isCorrect": false,
"inputTokens": 19802,
"outputTokens": 9,
"latencyMs": 1783.1710419999436
},
{
"questionId": "q153",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "15404143",
"actual": "14000000",
"isCorrect": false,
"inputTokens": 21881,
"outputTokens": 8,
"latencyMs": 20208.78741599992
},
{
"questionId": "q153",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "15404143",
"actual": "14012139",
"isCorrect": false,
"inputTokens": 13172,
"outputTokens": 14601,
"latencyMs": 139937.6586659999
},
{
"questionId": "q153",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "15404143",
"actual": "13,847,892",
"isCorrect": false,
"inputTokens": 14481,
"outputTokens": 9,
"latencyMs": 1949.8563330000034
},
{
"questionId": "q153",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "15404143",
"actual": "10999999",
"isCorrect": false,
"inputTokens": 17076,
"outputTokens": 8,
"latencyMs": 1061.2076249999227
},
{
"questionId": "q154",
"format": "json",
"model": "gpt-5-nano",
"expected": "100",
"actual": "86",
"isCorrect": false,
"inputTokens": 15188,
"outputTokens": 3591,
"latencyMs": 186054.49916699994
},
{
"questionId": "q154",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "100",
"isCorrect": true,
"inputTokens": 17408,
"outputTokens": 5,
"latencyMs": 1541.018458000035
},
{
"questionId": "q154",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "100",
"actual": "59",
"isCorrect": false,
"inputTokens": 19994,
"outputTokens": 2,
"latencyMs": 1209.527832999942
},
{
"questionId": "q154",
"format": "toon",
"model": "gpt-5-nano",
"expected": "100",
"actual": "100",
"isCorrect": true,
"inputTokens": 8789,
"outputTokens": 2311,
"latencyMs": 20000.66104200005
},
{
"questionId": "q154",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "100",
"isCorrect": true,
"inputTokens": 9278,
"outputTokens": 5,
"latencyMs": 1125.2787499999395
},
{
"questionId": "q154",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "100",
"actual": "50",
"isCorrect": false,
"inputTokens": 12340,
"outputTokens": 2,
"latencyMs": 2061.19062499993
},
{
"questionId": "q154",
"format": "csv",
"model": "gpt-5-nano",
"expected": "100",
"actual": "100",
"isCorrect": true,
"inputTokens": 8557,
"outputTokens": 3271,
"latencyMs": 29091.357792000053
},
{
"questionId": "q154",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "100",
"isCorrect": true,
"inputTokens": 9124,
"outputTokens": 5,
"latencyMs": 1029.3966670000227
},
{
"questionId": "q154",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "100",
"actual": "59",
"isCorrect": false,
"inputTokens": 12210,
"outputTokens": 2,
"latencyMs": 2304.6412080000155
},
{
"questionId": "q154",
"format": "xml",
"model": "gpt-5-nano",
"expected": "100",
"actual": "88",
"isCorrect": false,
"inputTokens": 17139,
"outputTokens": 2375,
"latencyMs": 25588.054458
},
{
"questionId": "q154",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "100",
"isCorrect": true,
"inputTokens": 19803,
"outputTokens": 5,
"latencyMs": 1378.1570839999476
},
{
"questionId": "q154",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "100",
"actual": "100",
"isCorrect": true,
"inputTokens": 21884,
"outputTokens": 3,
"latencyMs": 28098.016750000068
},
{
"questionId": "q154",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "100",
"actual": "88",
"isCorrect": false,
"inputTokens": 13172,
"outputTokens": 4359,
"latencyMs": 47106.68116699997
},
{
"questionId": "q154",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "100",
"isCorrect": true,
"inputTokens": 14482,
"outputTokens": 5,
"latencyMs": 2077.1985829999903
},
{
"questionId": "q154",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "100",
"actual": "50",
"isCorrect": false,
"inputTokens": 17079,
"outputTokens": 2,
"latencyMs": 1049.9515410000458
},
{
"questionId": "q155",
"format": "json",
"model": "gpt-5-nano",
"expected": "100",
"actual": "100",
"isCorrect": true,
"inputTokens": 15188,
"outputTokens": 5639,
"latencyMs": 52034.31104199996
},
{
"questionId": "q155",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "71",
"isCorrect": false,
"inputTokens": 17408,
"outputTokens": 5,
"latencyMs": 1774.2209169999696
},
{
"questionId": "q155",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "100",
"actual": "60",
"isCorrect": false,
"inputTokens": 19994,
"outputTokens": 2,
"latencyMs": 1397.8998329999158
},
{
"questionId": "q155",
"format": "toon",
"model": "gpt-5-nano",
"expected": "100",
"actual": "100",
"isCorrect": true,
"inputTokens": 8789,
"outputTokens": 2823,
"latencyMs": 26509.484792000032
},
{
"questionId": "q155",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "42",
"isCorrect": false,
"inputTokens": 9278,
"outputTokens": 5,
"latencyMs": 1028.7182500000345
},
{
"questionId": "q155",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "100",
"actual": "100",
"isCorrect": true,
"inputTokens": 12340,
"outputTokens": 3,
"latencyMs": 21919.32149999996
},
{
"questionId": "q155",
"format": "csv",
"model": "gpt-5-nano",
"expected": "100",
"actual": "100",
"isCorrect": true,
"inputTokens": 8557,
"outputTokens": 2631,
"latencyMs": 32920.081041999976
},
{
"questionId": "q155",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "47",
"isCorrect": false,
"inputTokens": 9124,
"outputTokens": 5,
"latencyMs": 1246.9641250000568
},
{
"questionId": "q155",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "100",
"actual": "100",
"isCorrect": true,
"inputTokens": 12210,
"outputTokens": 3,
"latencyMs": 17704.908124999958
},
{
"questionId": "q155",
"format": "xml",
"model": "gpt-5-nano",
"expected": "100",
"actual": "79",
"isCorrect": false,
"inputTokens": 17139,
"outputTokens": 4359,
"latencyMs": 36706.952500000014
},
{
"questionId": "q155",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "50",
"isCorrect": false,
"inputTokens": 19803,
"outputTokens": 5,
"latencyMs": 1653.922874999931
},
{
"questionId": "q155",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "100",
"actual": "100",
"isCorrect": true,
"inputTokens": 21884,
"outputTokens": 3,
"latencyMs": 18907.825375000015
},
{
"questionId": "q155",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "100",
"actual": "88",
"isCorrect": false,
"inputTokens": 13172,
"outputTokens": 2567,
"latencyMs": 29826.266333999927
},
{
"questionId": "q155",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "71",
"isCorrect": false,
"inputTokens": 14482,
"outputTokens": 5,
"latencyMs": 1877.8078329999698
},
{
"questionId": "q155",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "100",
"actual": "60",
"isCorrect": false,
"inputTokens": 17079,
"outputTokens": 2,
"latencyMs": 1709.5576250000158
},
{
"questionId": "q156",
"format": "json",
"model": "gpt-5-nano",
"expected": "76",
"actual": "61",
"isCorrect": false,
"inputTokens": 15188,
"outputTokens": 3015,
"latencyMs": 27373.73904200003
},
{
"questionId": "q156",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "76",
"actual": "100",
"isCorrect": false,
"inputTokens": 17408,
"outputTokens": 5,
"latencyMs": 2553.873874999932
},
{
"questionId": "q156",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "76",
"actual": "50",
"isCorrect": false,
"inputTokens": 19995,
"outputTokens": 2,
"latencyMs": 1292.7788750000764
},
{
"questionId": "q156",
"format": "toon",
"model": "gpt-5-nano",
"expected": "76",
"actual": "76",
"isCorrect": true,
"inputTokens": 8789,
"outputTokens": 3911,
"latencyMs": 38466.93025000009
},
{
"questionId": "q156",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "76",
"actual": "100",
"isCorrect": false,
"inputTokens": 9278,
"outputTokens": 5,
"latencyMs": 1207.3981249999488
},
{
"questionId": "q156",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "76",
"actual": "76",
"isCorrect": true,
"inputTokens": 12341,
"outputTokens": 2,
"latencyMs": 21904.33095799992
},
{
"questionId": "q156",
"format": "csv",
"model": "gpt-5-nano",
"expected": "76",
"actual": "75",
"isCorrect": false,
"inputTokens": 8557,
"outputTokens": 2951,
"latencyMs": 38943.062832999974
},
{
"questionId": "q156",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "76",
"actual": "100",
"isCorrect": false,
"inputTokens": 9124,
"outputTokens": 5,
"latencyMs": 1096.0891670000274
},
{
"questionId": "q156",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "76",
"actual": "76",
"isCorrect": true,
"inputTokens": 12211,
"outputTokens": 2,
"latencyMs": 16468.647499999963
},
{
"questionId": "q156",
"format": "xml",
"model": "gpt-5-nano",
"expected": "76",
"actual": "64",
"isCorrect": false,
"inputTokens": 17139,
"outputTokens": 1863,
"latencyMs": 18473.753917000024
},
{
"questionId": "q156",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "76",
"actual": "100",
"isCorrect": false,
"inputTokens": 19803,
"outputTokens": 5,
"latencyMs": 1316.2989590000361
},
{
"questionId": "q156",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "76",
"actual": "47",
"isCorrect": false,
"inputTokens": 21885,
"outputTokens": 2,
"latencyMs": 1786.060832999996
},
{
"questionId": "q156",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "76",
"actual": "72",
"isCorrect": false,
"inputTokens": 13172,
"outputTokens": 8711,
"latencyMs": 86456.99716699996
},
{
"questionId": "q156",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "76",
"actual": "100",
"isCorrect": false,
"inputTokens": 14482,
"outputTokens": 5,
"latencyMs": 1337.9467500000028
},
{
"questionId": "q156",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "76",
"actual": "42",
"isCorrect": false,
"inputTokens": 17080,
"outputTokens": 2,
"latencyMs": 1272.1261659999145
},
{
"questionId": "q157",
"format": "json",
"model": "gpt-5-nano",
"expected": "100",
"actual": "139",
"isCorrect": false,
"inputTokens": 15188,
"outputTokens": 8199,
"latencyMs": 117751.80679199996
},
{
"questionId": "q157",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "89",
"isCorrect": false,
"inputTokens": 17409,
"outputTokens": 5,
"latencyMs": 6994.20404099999
},
{
"questionId": "q157",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "100",
"actual": "60",
"isCorrect": false,
"inputTokens": 19993,
"outputTokens": 2,
"latencyMs": 1664.0891249999404
},
{
"questionId": "q157",
"format": "toon",
"model": "gpt-5-nano",
"expected": "100",
"actual": "100",
"isCorrect": true,
"inputTokens": 8789,
"outputTokens": 4103,
"latencyMs": 33535.55912499991
},
{
"questionId": "q157",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "73",
"isCorrect": false,
"inputTokens": 9279,
"outputTokens": 5,
"latencyMs": 1228.1867499999935
},
{
"questionId": "q157",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "100",
"actual": "60",
"isCorrect": false,
"inputTokens": 12339,
"outputTokens": 2,
"latencyMs": 1517.6247079999885
},
{
"questionId": "q157",
"format": "csv",
"model": "gpt-5-nano",
"expected": "100",
"actual": "87",
"isCorrect": false,
"inputTokens": 8557,
"outputTokens": 3079,
"latencyMs": 27126.57024999999
},
{
"questionId": "q157",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "89",
"isCorrect": false,
"inputTokens": 9125,
"outputTokens": 5,
"latencyMs": 949.5018749999581
},
{
"questionId": "q157",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "100",
"actual": "60",
"isCorrect": false,
"inputTokens": 12209,
"outputTokens": 2,
"latencyMs": 2366.7855419999687
},
{
"questionId": "q157",
"format": "xml",
"model": "gpt-5-nano",
"expected": "100",
"actual": "69",
"isCorrect": false,
"inputTokens": 17139,
"outputTokens": 2183,
"latencyMs": 35555.629874999984
},
{
"questionId": "q157",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "71",
"isCorrect": false,
"inputTokens": 19804,
"outputTokens": 5,
"latencyMs": 1865.6005420000292
},
{
"questionId": "q157",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "100",
"actual": "100",
"isCorrect": true,
"inputTokens": 21883,
"outputTokens": 3,
"latencyMs": 22966.85654200008
},
{
"questionId": "q157",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "100",
"actual": "100",
"isCorrect": true,
"inputTokens": 13172,
"outputTokens": 2503,
"latencyMs": 23299.811666000052
},
{
"questionId": "q157",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "95",
"isCorrect": false,
"inputTokens": 14483,
"outputTokens": 5,
"latencyMs": 1111.9951249998994
},
{
"questionId": "q157",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "100",
"actual": "50",
"isCorrect": false,
"inputTokens": 17078,
"outputTokens": 2,
"latencyMs": 1229.8220420000143
},
{
"questionId": "q158",
"format": "json",
"model": "gpt-5-nano",
"expected": "95",
"actual": "60",
"isCorrect": false,
"inputTokens": 15188,
"outputTokens": 2439,
"latencyMs": 23952.90112500009
},
{
"questionId": "q158",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "95",
"actual": "42",
"isCorrect": false,
"inputTokens": 17409,
"outputTokens": 5,
"latencyMs": 2635.0509999999776
},
{
"questionId": "q158",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "95",
"actual": "59",
"isCorrect": false,
"inputTokens": 19993,
"outputTokens": 2,
"latencyMs": 1382.6497909999453
},
{
"questionId": "q158",
"format": "toon",
"model": "gpt-5-nano",
"expected": "95",
"actual": "95",
"isCorrect": true,
"inputTokens": 8789,
"outputTokens": 5255,
"latencyMs": 52427.638499999885
},
{
"questionId": "q158",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "95",
"actual": "42",
"isCorrect": false,
"inputTokens": 9279,
"outputTokens": 5,
"latencyMs": 1752.1665410000132
},
{
"questionId": "q158",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "95",
"actual": "95",
"isCorrect": true,
"inputTokens": 12339,
"outputTokens": 2,
"latencyMs": 30665.240666999947
},
{
"questionId": "q158",
"format": "csv",
"model": "gpt-5-nano",
"expected": "95",
"actual": "96",
"isCorrect": false,
"inputTokens": 8557,
"outputTokens": 4999,
"latencyMs": 52545.94787500007
},
{
"questionId": "q158",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "95",
"actual": "42",
"isCorrect": false,
"inputTokens": 9125,
"outputTokens": 5,
"latencyMs": 1330.860624999972
},
{
"questionId": "q158",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "95",
"actual": "60",
"isCorrect": false,
"inputTokens": 12209,
"outputTokens": 2,
"latencyMs": 2559.635125000146
},
{
"questionId": "q158",
"format": "xml",
"model": "gpt-5-nano",
"expected": "95",
"actual": "96",
"isCorrect": false,
"inputTokens": 17139,
"outputTokens": 13447,
"latencyMs": 177292.60950000002
},
{
"questionId": "q158",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "95",
"actual": "32",
"isCorrect": false,
"inputTokens": 19804,
"outputTokens": 5,
"latencyMs": 1816.5423749999609
},
{
"questionId": "q158",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "95",
"actual": "60",
"isCorrect": false,
"inputTokens": 21883,
"outputTokens": 2,
"latencyMs": 3004.8347500001546
},
{
"questionId": "q158",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "95",
"actual": "96",
"isCorrect": false,
"inputTokens": 13172,
"outputTokens": 3975,
"latencyMs": 42573.26512499992
},
{
"questionId": "q158",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "95",
"actual": "47",
"isCorrect": false,
"inputTokens": 14483,
"outputTokens": 5,
"latencyMs": 1499.2267080000602
},
{
"questionId": "q158",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "95",
"actual": "60",
"isCorrect": false,
"inputTokens": 17078,
"outputTokens": 2,
"latencyMs": 1173.8084579999559
},
{
"questionId": "q159",
"format": "json",
"model": "gpt-5-nano",
"expected": "83",
"actual": "50",
"isCorrect": false,
"inputTokens": 15188,
"outputTokens": 11719,
"latencyMs": 109516.51062500011
},
{
"questionId": "q159",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "83",
"actual": "71",
"isCorrect": false,
"inputTokens": 17409,
"outputTokens": 5,
"latencyMs": 1886.0561250001192
},
{
"questionId": "q159",
"format": "json",
"model": "gemini-2.5-flash",
"expected": "83",
"actual": "59",
"isCorrect": false,
"inputTokens": 19994,
"outputTokens": 2,
"latencyMs": 2211.6038330001757
},
{
"questionId": "q159",
"format": "toon",
"model": "gpt-5-nano",
"expected": "83",
"actual": "83",
"isCorrect": true,
"inputTokens": 8789,
"outputTokens": 3463,
"latencyMs": 36709.80866700015
},
{
"questionId": "q159",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "83",
"actual": "73",
"isCorrect": false,
"inputTokens": 9279,
"outputTokens": 5,
"latencyMs": 1961.9631250000093
},
{
"questionId": "q159",
"format": "toon",
"model": "gemini-2.5-flash",
"expected": "83",
"actual": "83",
"isCorrect": true,
"inputTokens": 12340,
"outputTokens": 2,
"latencyMs": 18972.830374999903
},
{
"questionId": "q159",
"format": "csv",
"model": "gpt-5-nano",
"expected": "83",
"actual": "83",
"isCorrect": true,
"inputTokens": 8557,
"outputTokens": 6919,
"latencyMs": 69083.2129579999
},
{
"questionId": "q159",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "83",
"actual": "73",
"isCorrect": false,
"inputTokens": 9125,
"outputTokens": 5,
"latencyMs": 1200.284708000021
},
{
"questionId": "q159",
"format": "csv",
"model": "gemini-2.5-flash",
"expected": "83",
"actual": "83",
"isCorrect": true,
"inputTokens": 12210,
"outputTokens": 2,
"latencyMs": 33046.47866699984
},
{
"questionId": "q159",
"format": "xml",
"model": "gpt-5-nano",
"expected": "83",
"actual": "112",
"isCorrect": false,
"inputTokens": 17139,
"outputTokens": 6535,
"latencyMs": 62622.555124999955
},
{
"questionId": "q159",
"format": "xml",
"model": "claude-haiku-4-5",
"expected": "83",
"actual": "47",
"isCorrect": false,
"inputTokens": 19804,
"outputTokens": 5,
"latencyMs": 1500.2770829999354
},
{
"questionId": "q159",
"format": "xml",
"model": "gemini-2.5-flash",
"expected": "83",
"actual": "49",
"isCorrect": false,
"inputTokens": 21884,
"outputTokens": 2,
"latencyMs": 2811.6203749999404
},
{
"questionId": "q159",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "83",
"actual": "90",
"isCorrect": false,
"inputTokens": 13172,
"outputTokens": 25095,
"latencyMs": 237521.54700000002
},
{
"questionId": "q159",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "83",
"actual": "71",
"isCorrect": false,
"inputTokens": 14483,
"outputTokens": 5,
"latencyMs": 1567.613791000098
},
{
"questionId": "q159",
"format": "yaml",
"model": "gemini-2.5-flash",
"expected": "83",
"actual": "49",
"isCorrect": false,
"inputTokens": 17079,
"outputTokens": 2,
"latencyMs": 1373.2515409998596
}
]