Files
toon/benchmarks/results/accuracy/raw-results.json
2025-10-27 13:45:48 +01:00

17493 lines
382 KiB
JSON

[
{
"questionId": "q1",
"format": "json",
"model": "gpt-5-nano",
"expected": "56176",
"actual": "56176",
"correct": true,
"inputTokens": 6390,
"outputTokens": 72,
"latencyMs": 2221.390167
},
{
"questionId": "q1",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "56176",
"actual": "56176",
"correct": true,
"inputTokens": 7870,
"outputTokens": 6,
"latencyMs": 1276.715333
},
{
"questionId": "q1",
"format": "toon",
"model": "gpt-5-nano",
"expected": "56176",
"actual": "56176",
"correct": true,
"inputTokens": 2527,
"outputTokens": 72,
"latencyMs": 3718.250833
},
{
"questionId": "q1",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "56176",
"actual": "56176",
"correct": true,
"inputTokens": 2982,
"outputTokens": 6,
"latencyMs": 1215.944708
},
{
"questionId": "q1",
"format": "csv",
"model": "gpt-5-nano",
"expected": "56176",
"actual": "56176",
"correct": true,
"inputTokens": 2381,
"outputTokens": 72,
"latencyMs": 2417.306625
},
{
"questionId": "q1",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "56176",
"actual": "56176",
"correct": true,
"inputTokens": 2856,
"outputTokens": 6,
"latencyMs": 1152.5258749999998
},
{
"questionId": "q1",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "56176",
"actual": "56176",
"correct": true,
"inputTokens": 6316,
"outputTokens": 72,
"latencyMs": 4603.444417
},
{
"questionId": "q1",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "56176",
"actual": "56176",
"correct": true,
"inputTokens": 6365,
"outputTokens": 6,
"latencyMs": 1390.011125
},
{
"questionId": "q1",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "56176",
"actual": "56176",
"correct": true,
"inputTokens": 5012,
"outputTokens": 8,
"latencyMs": 4339.294459
},
{
"questionId": "q1",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "56176",
"actual": "56176",
"correct": true,
"inputTokens": 5760,
"outputTokens": 6,
"latencyMs": 1374.47325
},
{
"questionId": "q2",
"format": "json",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6390,
"outputTokens": 135,
"latencyMs": 2550.589042
},
{
"questionId": "q2",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 7869,
"outputTokens": 4,
"latencyMs": 1139.559917
},
{
"questionId": "q2",
"format": "toon",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2527,
"outputTokens": 135,
"latencyMs": 2422.8178749999997
},
{
"questionId": "q2",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2981,
"outputTokens": 4,
"latencyMs": 1135.579459
},
{
"questionId": "q2",
"format": "csv",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2381,
"outputTokens": 71,
"latencyMs": 4198.553583999999
},
{
"questionId": "q2",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2855,
"outputTokens": 4,
"latencyMs": 1147.9685829999999
},
{
"questionId": "q2",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6316,
"outputTokens": 71,
"latencyMs": 2594.702667
},
{
"questionId": "q2",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6364,
"outputTokens": 4,
"latencyMs": 1568.4054999999998
},
{
"questionId": "q2",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 5012,
"outputTokens": 71,
"latencyMs": 2516.345875
},
{
"questionId": "q2",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 5759,
"outputTokens": 4,
"latencyMs": 1633.5375000000001
},
{
"questionId": "q3",
"format": "json",
"model": "gpt-5-nano",
"expected": "lorenza.kunze@yahoo.com",
"actual": "lorenza.kunze@yahoo.com",
"correct": true,
"inputTokens": 6392,
"outputTokens": 76,
"latencyMs": 2079.8442499999996
},
{
"questionId": "q3",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "lorenza.kunze@yahoo.com",
"actual": "lorenza.kunze@yahoo.com",
"correct": true,
"inputTokens": 7874,
"outputTokens": 12,
"latencyMs": 1201.556458
},
{
"questionId": "q3",
"format": "toon",
"model": "gpt-5-nano",
"expected": "lorenza.kunze@yahoo.com",
"actual": "lorenza.kunze@yahoo.com",
"correct": true,
"inputTokens": 2529,
"outputTokens": 140,
"latencyMs": 2356.408
},
{
"questionId": "q3",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "lorenza.kunze@yahoo.com",
"actual": "lorenza.kunze@yahoo.com",
"correct": true,
"inputTokens": 2986,
"outputTokens": 12,
"latencyMs": 1113.255166
},
{
"questionId": "q3",
"format": "csv",
"model": "gpt-5-nano",
"expected": "lorenza.kunze@yahoo.com",
"actual": "lorenza.kunze@yahoo.com",
"correct": true,
"inputTokens": 2383,
"outputTokens": 140,
"latencyMs": 2188.5425419999997
},
{
"questionId": "q3",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "lorenza.kunze@yahoo.com",
"actual": "lorenza.kunze@yahoo.com",
"correct": true,
"inputTokens": 2860,
"outputTokens": 12,
"latencyMs": 1029.9496669999999
},
{
"questionId": "q3",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "lorenza.kunze@yahoo.com",
"actual": "lorenza.kunze@yahoo.com",
"correct": true,
"inputTokens": 6318,
"outputTokens": 140,
"latencyMs": 2605.8857080000002
},
{
"questionId": "q3",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "lorenza.kunze@yahoo.com",
"actual": "lorenza.kunze@yahoo.com",
"correct": true,
"inputTokens": 6369,
"outputTokens": 12,
"latencyMs": 1273.5997920000004
},
{
"questionId": "q3",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "lorenza.kunze@yahoo.com",
"actual": "lorenza.kunze@yahoo.com",
"correct": true,
"inputTokens": 5014,
"outputTokens": 140,
"latencyMs": 2530.4294580000005
},
{
"questionId": "q3",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "lorenza.kunze@yahoo.com",
"actual": "lorenza.kunze@yahoo.com",
"correct": true,
"inputTokens": 5764,
"outputTokens": 12,
"latencyMs": 1404.4837089999996
},
{
"questionId": "q4",
"format": "json",
"model": "gpt-5-nano",
"expected": "117381",
"actual": "117381",
"correct": true,
"inputTokens": 6390,
"outputTokens": 72,
"latencyMs": 2302.062125
},
{
"questionId": "q4",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "117381",
"actual": "117381",
"correct": true,
"inputTokens": 7870,
"outputTokens": 6,
"latencyMs": 1114.0778329999998
},
{
"questionId": "q4",
"format": "toon",
"model": "gpt-5-nano",
"expected": "117381",
"actual": "117381",
"correct": true,
"inputTokens": 2527,
"outputTokens": 72,
"latencyMs": 2006.7020830000001
},
{
"questionId": "q4",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "117381",
"actual": "117381",
"correct": true,
"inputTokens": 2982,
"outputTokens": 6,
"latencyMs": 1641.5518749999997
},
{
"questionId": "q4",
"format": "csv",
"model": "gpt-5-nano",
"expected": "117381",
"actual": "117381",
"correct": true,
"inputTokens": 2381,
"outputTokens": 136,
"latencyMs": 2850.351709
},
{
"questionId": "q4",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "117381",
"actual": "117381",
"correct": true,
"inputTokens": 2856,
"outputTokens": 6,
"latencyMs": 1367.7319589999997
},
{
"questionId": "q4",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "117381",
"actual": "117381",
"correct": true,
"inputTokens": 6316,
"outputTokens": 72,
"latencyMs": 2477.8365839999997
},
{
"questionId": "q4",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "117381",
"actual": "117381",
"correct": true,
"inputTokens": 6365,
"outputTokens": 6,
"latencyMs": 1309.567083
},
{
"questionId": "q4",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "117381",
"actual": "117381",
"correct": true,
"inputTokens": 5012,
"outputTokens": 72,
"latencyMs": 1794.2651250000008
},
{
"questionId": "q4",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "117381",
"actual": "117381",
"correct": true,
"inputTokens": 5760,
"outputTokens": 6,
"latencyMs": 1177.5377079999998
},
{
"questionId": "q5",
"format": "json",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6389,
"outputTokens": 71,
"latencyMs": 1963.9477500000003
},
{
"questionId": "q5",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 7868,
"outputTokens": 4,
"latencyMs": 1024.5166669999999
},
{
"questionId": "q5",
"format": "toon",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2526,
"outputTokens": 135,
"latencyMs": 2291.4288749999996
},
{
"questionId": "q5",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2980,
"outputTokens": 4,
"latencyMs": 1312.7111250000007
},
{
"questionId": "q5",
"format": "csv",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2380,
"outputTokens": 135,
"latencyMs": 1727.6371660000004
},
{
"questionId": "q5",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2854,
"outputTokens": 4,
"latencyMs": 1097.0443749999995
},
{
"questionId": "q5",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6315,
"outputTokens": 135,
"latencyMs": 2671.2276250000004
},
{
"questionId": "q5",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6363,
"outputTokens": 4,
"latencyMs": 1174.8639999999996
},
{
"questionId": "q5",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 5011,
"outputTokens": 71,
"latencyMs": 2306.2642499999993
},
{
"questionId": "q5",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 5758,
"outputTokens": 4,
"latencyMs": 2822.8963750000003
},
{
"questionId": "q6",
"format": "json",
"model": "gpt-5-nano",
"expected": "jayda60@hotmail.com",
"actual": "jayda60@hotmail.com",
"correct": true,
"inputTokens": 6390,
"outputTokens": 139,
"latencyMs": 2827.0400409999993
},
{
"questionId": "q6",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "jayda60@hotmail.com",
"actual": "jayda60@hotmail.com",
"correct": true,
"inputTokens": 7871,
"outputTokens": 11,
"latencyMs": 1151.7215829999996
},
{
"questionId": "q6",
"format": "toon",
"model": "gpt-5-nano",
"expected": "jayda60@hotmail.com",
"actual": "jayda60@hotmail.com",
"correct": true,
"inputTokens": 2527,
"outputTokens": 75,
"latencyMs": 1714.2902919999997
},
{
"questionId": "q6",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "jayda60@hotmail.com",
"actual": "jayda60@hotmail.com",
"correct": true,
"inputTokens": 2983,
"outputTokens": 11,
"latencyMs": 1810.6344170000011
},
{
"questionId": "q6",
"format": "csv",
"model": "gpt-5-nano",
"expected": "jayda60@hotmail.com",
"actual": "jayda60@hotmail.com",
"correct": true,
"inputTokens": 2381,
"outputTokens": 75,
"latencyMs": 2548.0390000000007
},
{
"questionId": "q6",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "jayda60@hotmail.com",
"actual": "jayda60@hotmail.com",
"correct": true,
"inputTokens": 2857,
"outputTokens": 11,
"latencyMs": 1046.7650829999993
},
{
"questionId": "q6",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "jayda60@hotmail.com",
"actual": "jayda60@hotmail.com",
"correct": true,
"inputTokens": 6316,
"outputTokens": 139,
"latencyMs": 2408.879916000001
},
{
"questionId": "q6",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "jayda60@hotmail.com",
"actual": "jayda60@hotmail.com",
"correct": true,
"inputTokens": 6366,
"outputTokens": 11,
"latencyMs": 1186.5773750000008
},
{
"questionId": "q6",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "jayda60@hotmail.com",
"actual": "jayda60@hotmail.com",
"correct": true,
"inputTokens": 5012,
"outputTokens": 139,
"latencyMs": 3157.9398329999995
},
{
"questionId": "q6",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "jayda60@hotmail.com",
"actual": "jayda60@hotmail.com",
"correct": true,
"inputTokens": 5761,
"outputTokens": 11,
"latencyMs": 1129.6754170000004
},
{
"questionId": "q7",
"format": "json",
"model": "gpt-5-nano",
"expected": "92971",
"actual": "92971",
"correct": true,
"inputTokens": 6390,
"outputTokens": 72,
"latencyMs": 2893.3476250000003
},
{
"questionId": "q7",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "92971",
"actual": "92971",
"correct": true,
"inputTokens": 7870,
"outputTokens": 6,
"latencyMs": 1288.7682919999988
},
{
"questionId": "q7",
"format": "toon",
"model": "gpt-5-nano",
"expected": "92971",
"actual": "92971",
"correct": true,
"inputTokens": 2527,
"outputTokens": 72,
"latencyMs": 2324.6738330000007
},
{
"questionId": "q7",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "92971",
"actual": "92971",
"correct": true,
"inputTokens": 2982,
"outputTokens": 6,
"latencyMs": 1095.704291
},
{
"questionId": "q7",
"format": "csv",
"model": "gpt-5-nano",
"expected": "92971",
"actual": "92971",
"correct": true,
"inputTokens": 2381,
"outputTokens": 136,
"latencyMs": 3980.3727500000005
},
{
"questionId": "q7",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "92971",
"actual": "92971",
"correct": true,
"inputTokens": 2856,
"outputTokens": 6,
"latencyMs": 1122.8730419999993
},
{
"questionId": "q7",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "92971",
"actual": "92971",
"correct": true,
"inputTokens": 6316,
"outputTokens": 72,
"latencyMs": 2030.0818330000002
},
{
"questionId": "q7",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "92971",
"actual": "92971",
"correct": true,
"inputTokens": 6365,
"outputTokens": 6,
"latencyMs": 1705.6364999999987
},
{
"questionId": "q7",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "92971",
"actual": "92971",
"correct": true,
"inputTokens": 5012,
"outputTokens": 72,
"latencyMs": 1611.3567500000008
},
{
"questionId": "q7",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "92971",
"actual": "92971",
"correct": true,
"inputTokens": 5760,
"outputTokens": 6,
"latencyMs": 1109.0094590000008
},
{
"questionId": "q8",
"format": "json",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6390,
"outputTokens": 199,
"latencyMs": 3099.078125
},
{
"questionId": "q8",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 7871,
"outputTokens": 4,
"latencyMs": 1115.9911250000005
},
{
"questionId": "q8",
"format": "toon",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2527,
"outputTokens": 135,
"latencyMs": 2833.193875000001
},
{
"questionId": "q8",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2983,
"outputTokens": 4,
"latencyMs": 933.1444169999995
},
{
"questionId": "q8",
"format": "csv",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2381,
"outputTokens": 199,
"latencyMs": 2315.536
},
{
"questionId": "q8",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2857,
"outputTokens": 4,
"latencyMs": 1300.336792
},
{
"questionId": "q8",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6316,
"outputTokens": 135,
"latencyMs": 7016.997917000002
},
{
"questionId": "q8",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6366,
"outputTokens": 4,
"latencyMs": 1288.107333
},
{
"questionId": "q8",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 5012,
"outputTokens": 135,
"latencyMs": 2474.8247499999998
},
{
"questionId": "q8",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 5761,
"outputTokens": 4,
"latencyMs": 1027.9775420000005
},
{
"questionId": "q9",
"format": "json",
"model": "gpt-5-nano",
"expected": "terrance.hansen@yahoo.com",
"actual": "terrance.hansen@yahoo.com",
"correct": true,
"inputTokens": 6392,
"outputTokens": 652,
"latencyMs": 8322.172416
},
{
"questionId": "q9",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "terrance.hansen@yahoo.com",
"actual": "terrance.hansen@yahoo.com",
"correct": true,
"inputTokens": 7871,
"outputTokens": 11,
"latencyMs": 1066.3422090000004
},
{
"questionId": "q9",
"format": "toon",
"model": "gpt-5-nano",
"expected": "terrance.hansen@yahoo.com",
"actual": "terrance.hansen@yahoo.com",
"correct": true,
"inputTokens": 2529,
"outputTokens": 76,
"latencyMs": 2245.5604999999996
},
{
"questionId": "q9",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "terrance.hansen@yahoo.com",
"actual": "terrance.hansen@yahoo.com",
"correct": true,
"inputTokens": 2983,
"outputTokens": 11,
"latencyMs": 1179.7512079999997
},
{
"questionId": "q9",
"format": "csv",
"model": "gpt-5-nano",
"expected": "terrance.hansen@yahoo.com",
"actual": "terrance.hansen@yahoo.com",
"correct": true,
"inputTokens": 2383,
"outputTokens": 204,
"latencyMs": 2584.0723340000004
},
{
"questionId": "q9",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "terrance.hansen@yahoo.com",
"actual": "terrance.hansen@yahoo.com",
"correct": true,
"inputTokens": 2857,
"outputTokens": 11,
"latencyMs": 1204.6979589999992
},
{
"questionId": "q9",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "terrance.hansen@yahoo.com",
"actual": "terrance.hansen@yahoo.com",
"correct": true,
"inputTokens": 6318,
"outputTokens": 396,
"latencyMs": 3824.918375000001
},
{
"questionId": "q9",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "terrance.hansen@yahoo.com",
"actual": "terrance.hansen@yahoo.com",
"correct": true,
"inputTokens": 6366,
"outputTokens": 11,
"latencyMs": 1492.6765830000004
},
{
"questionId": "q9",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "terrance.hansen@yahoo.com",
"actual": "terrance.hansen@yahoo.com",
"correct": true,
"inputTokens": 5014,
"outputTokens": 76,
"latencyMs": 1834.562
},
{
"questionId": "q9",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "terrance.hansen@yahoo.com",
"actual": "terrance.hansen@yahoo.com",
"correct": true,
"inputTokens": 5761,
"outputTokens": 11,
"latencyMs": 1245.0000419999997
},
{
"questionId": "q10",
"format": "json",
"model": "gpt-5-nano",
"expected": "107744",
"actual": "107744",
"correct": true,
"inputTokens": 6391,
"outputTokens": 136,
"latencyMs": 2337.0652499999997
},
{
"questionId": "q10",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "107744",
"actual": "107744",
"correct": true,
"inputTokens": 7870,
"outputTokens": 6,
"latencyMs": 1148.1971250000006
},
{
"questionId": "q10",
"format": "toon",
"model": "gpt-5-nano",
"expected": "107744",
"actual": "107744",
"correct": true,
"inputTokens": 2528,
"outputTokens": 72,
"latencyMs": 2736.2375420000008
},
{
"questionId": "q10",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "107744",
"actual": "107744",
"correct": true,
"inputTokens": 2982,
"outputTokens": 6,
"latencyMs": 1164.4291250000006
},
{
"questionId": "q10",
"format": "csv",
"model": "gpt-5-nano",
"expected": "107744",
"actual": "107744",
"correct": true,
"inputTokens": 2382,
"outputTokens": 72,
"latencyMs": 2479.8535840000004
},
{
"questionId": "q10",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "107744",
"actual": "107744",
"correct": true,
"inputTokens": 2856,
"outputTokens": 6,
"latencyMs": 1032.3198329999996
},
{
"questionId": "q10",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "107744",
"actual": "107744",
"correct": true,
"inputTokens": 6317,
"outputTokens": 136,
"latencyMs": 2237.465583000001
},
{
"questionId": "q10",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "107744",
"actual": "107744",
"correct": true,
"inputTokens": 6365,
"outputTokens": 6,
"latencyMs": 1254.3189160000002
},
{
"questionId": "q10",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "107744",
"actual": "107744",
"correct": true,
"inputTokens": 5013,
"outputTokens": 72,
"latencyMs": 3753.917125
},
{
"questionId": "q10",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "107744",
"actual": "107744",
"correct": true,
"inputTokens": 5760,
"outputTokens": 6,
"latencyMs": 1154.7003750000003
},
{
"questionId": "q11",
"format": "json",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6390,
"outputTokens": 135,
"latencyMs": 2621.2275420000005
},
{
"questionId": "q11",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 7869,
"outputTokens": 4,
"latencyMs": 1222.843499999999
},
{
"questionId": "q11",
"format": "toon",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2527,
"outputTokens": 71,
"latencyMs": 1762.1339159999989
},
{
"questionId": "q11",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2981,
"outputTokens": 4,
"latencyMs": 1630.7307079999991
},
{
"questionId": "q11",
"format": "csv",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2381,
"outputTokens": 71,
"latencyMs": 1848.9775829999999
},
{
"questionId": "q11",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2855,
"outputTokens": 4,
"latencyMs": 1080.8682500000014
},
{
"questionId": "q11",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6316,
"outputTokens": 135,
"latencyMs": 26303.357959
},
{
"questionId": "q11",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6364,
"outputTokens": 4,
"latencyMs": 1354.007999999998
},
{
"questionId": "q11",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 5012,
"outputTokens": 71,
"latencyMs": 1924.4625829999986
},
{
"questionId": "q11",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 5759,
"outputTokens": 4,
"latencyMs": 1279.5235830000001
},
{
"questionId": "q12",
"format": "json",
"model": "gpt-5-nano",
"expected": "allan21@gmail.com",
"actual": "allan21@gmail.com",
"correct": true,
"inputTokens": 6389,
"outputTokens": 330,
"latencyMs": 3997.3972079999985
},
{
"questionId": "q12",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "allan21@gmail.com",
"actual": "allan21@gmail.com",
"correct": true,
"inputTokens": 7867,
"outputTokens": 9,
"latencyMs": 1153.9412079999984
},
{
"questionId": "q12",
"format": "toon",
"model": "gpt-5-nano",
"expected": "allan21@gmail.com",
"actual": "allan21@gmail.com",
"correct": true,
"inputTokens": 2526,
"outputTokens": 138,
"latencyMs": 2494.580582999999
},
{
"questionId": "q12",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "allan21@gmail.com",
"actual": "allan21@gmail.com",
"correct": true,
"inputTokens": 2979,
"outputTokens": 9,
"latencyMs": 1350.1353750000017
},
{
"questionId": "q12",
"format": "csv",
"model": "gpt-5-nano",
"expected": "allan21@gmail.com",
"actual": "allan21@gmail.com",
"correct": true,
"inputTokens": 2380,
"outputTokens": 138,
"latencyMs": 3024.4009160000023
},
{
"questionId": "q12",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "allan21@gmail.com",
"actual": "allan21@gmail.com",
"correct": true,
"inputTokens": 2853,
"outputTokens": 9,
"latencyMs": 1199.3955830000014
},
{
"questionId": "q12",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "allan21@gmail.com",
"actual": "allan21@gmail.com",
"correct": true,
"inputTokens": 6315,
"outputTokens": 138,
"latencyMs": 5168.116582999999
},
{
"questionId": "q12",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "allan21@gmail.com",
"actual": "allan21@gmail.com",
"correct": true,
"inputTokens": 6362,
"outputTokens": 9,
"latencyMs": 1198.3554160000022
},
{
"questionId": "q12",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "allan21@gmail.com",
"actual": "allan21@gmail.com",
"correct": true,
"inputTokens": 5011,
"outputTokens": 74,
"latencyMs": 2632.998958999997
},
{
"questionId": "q12",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "allan21@gmail.com",
"actual": "allan21@gmail.com",
"correct": true,
"inputTokens": 5757,
"outputTokens": 9,
"latencyMs": 1124.5625419999997
},
{
"questionId": "q13",
"format": "json",
"model": "gpt-5-nano",
"expected": "145843",
"actual": "145843",
"correct": true,
"inputTokens": 6388,
"outputTokens": 72,
"latencyMs": 2357.2276249999995
},
{
"questionId": "q13",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "145843",
"actual": "145843",
"correct": true,
"inputTokens": 7868,
"outputTokens": 6,
"latencyMs": 1267.960791999998
},
{
"questionId": "q13",
"format": "toon",
"model": "gpt-5-nano",
"expected": "145843",
"actual": "145843",
"correct": true,
"inputTokens": 2525,
"outputTokens": 136,
"latencyMs": 2397.798125000001
},
{
"questionId": "q13",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "145843",
"actual": "145843",
"correct": true,
"inputTokens": 2980,
"outputTokens": 6,
"latencyMs": 1170.6429580000004
},
{
"questionId": "q13",
"format": "csv",
"model": "gpt-5-nano",
"expected": "145843",
"actual": "145843",
"correct": true,
"inputTokens": 2379,
"outputTokens": 136,
"latencyMs": 3227.198124999999
},
{
"questionId": "q13",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "145843",
"actual": "145843",
"correct": true,
"inputTokens": 2854,
"outputTokens": 6,
"latencyMs": 1112.6066250000003
},
{
"questionId": "q13",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "145843",
"actual": "145843",
"correct": true,
"inputTokens": 6314,
"outputTokens": 72,
"latencyMs": 2036.251791999999
},
{
"questionId": "q13",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "145843",
"actual": "145843",
"correct": true,
"inputTokens": 6363,
"outputTokens": 6,
"latencyMs": 1290.7641250000015
},
{
"questionId": "q13",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "145843",
"actual": "145843",
"correct": true,
"inputTokens": 5010,
"outputTokens": 72,
"latencyMs": 2262.8405840000014
},
{
"questionId": "q13",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "145843",
"actual": "145843",
"correct": true,
"inputTokens": 5758,
"outputTokens": 6,
"latencyMs": 1193.2695419999982
},
{
"questionId": "q14",
"format": "json",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6389,
"outputTokens": 71,
"latencyMs": 3198.2654159999984
},
{
"questionId": "q14",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 7868,
"outputTokens": 4,
"latencyMs": 1229.8644999999997
},
{
"questionId": "q14",
"format": "toon",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2526,
"outputTokens": 71,
"latencyMs": 3293.710084000002
},
{
"questionId": "q14",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2980,
"outputTokens": 4,
"latencyMs": 1121.200334000001
},
{
"questionId": "q14",
"format": "csv",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2380,
"outputTokens": 71,
"latencyMs": 2497.4451249999984
},
{
"questionId": "q14",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2854,
"outputTokens": 4,
"latencyMs": 1152.0107500000013
},
{
"questionId": "q14",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6315,
"outputTokens": 71,
"latencyMs": 3547.6399999999994
},
{
"questionId": "q14",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6363,
"outputTokens": 4,
"latencyMs": 2007.6731249999975
},
{
"questionId": "q14",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 5011,
"outputTokens": 71,
"latencyMs": 7054.295208
},
{
"questionId": "q14",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 5758,
"outputTokens": 4,
"latencyMs": 1230.5032920000012
},
{
"questionId": "q15",
"format": "json",
"model": "gpt-5-nano",
"expected": "alexandria61@gmail.com",
"actual": "alexandria61@gmail.com",
"correct": true,
"inputTokens": 6390,
"outputTokens": 76,
"latencyMs": 2049.933416
},
{
"questionId": "q15",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "alexandria61@gmail.com",
"actual": "alexandria61@gmail.com",
"correct": true,
"inputTokens": 7869,
"outputTokens": 9,
"latencyMs": 1217.1906249999993
},
{
"questionId": "q15",
"format": "toon",
"model": "gpt-5-nano",
"expected": "alexandria61@gmail.com",
"actual": "alexandria61@gmail.com",
"correct": true,
"inputTokens": 2527,
"outputTokens": 204,
"latencyMs": 2844.136208
},
{
"questionId": "q15",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "alexandria61@gmail.com",
"actual": "alexandria61@gmail.com",
"correct": true,
"inputTokens": 2981,
"outputTokens": 9,
"latencyMs": 2166.8829589999987
},
{
"questionId": "q15",
"format": "csv",
"model": "gpt-5-nano",
"expected": "alexandria61@gmail.com",
"actual": "alexandria61@gmail.com",
"correct": true,
"inputTokens": 2381,
"outputTokens": 204,
"latencyMs": 2726.5934579999994
},
{
"questionId": "q15",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "alexandria61@gmail.com",
"actual": "alexandria61@gmail.com",
"correct": true,
"inputTokens": 2855,
"outputTokens": 9,
"latencyMs": 1107.4675410000018
},
{
"questionId": "q15",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "alexandria61@gmail.com",
"actual": "alexandria61@gmail.com",
"correct": true,
"inputTokens": 6316,
"outputTokens": 76,
"latencyMs": 2260.4548749999994
},
{
"questionId": "q15",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "alexandria61@gmail.com",
"actual": "alexandria61@gmail.com",
"correct": true,
"inputTokens": 6364,
"outputTokens": 9,
"latencyMs": 1257.2797080000018
},
{
"questionId": "q15",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "alexandria61@gmail.com",
"actual": "alexandria61@gmail.com",
"correct": true,
"inputTokens": 5012,
"outputTokens": 140,
"latencyMs": 2565.571791999999
},
{
"questionId": "q15",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "alexandria61@gmail.com",
"actual": "alexandria61@gmail.com",
"correct": true,
"inputTokens": 5759,
"outputTokens": 9,
"latencyMs": 1255.2880829999995
},
{
"questionId": "q16",
"format": "json",
"model": "gpt-5-nano",
"expected": "89436",
"actual": "89436",
"correct": true,
"inputTokens": 6389,
"outputTokens": 136,
"latencyMs": 2595.422042000002
},
{
"questionId": "q16",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "89436",
"actual": "89436",
"correct": true,
"inputTokens": 7870,
"outputTokens": 6,
"latencyMs": 1090.4299170000013
},
{
"questionId": "q16",
"format": "toon",
"model": "gpt-5-nano",
"expected": "89436",
"actual": "89436",
"correct": true,
"inputTokens": 2526,
"outputTokens": 72,
"latencyMs": 2985.3881250000013
},
{
"questionId": "q16",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "89436",
"actual": "89436",
"correct": true,
"inputTokens": 2982,
"outputTokens": 6,
"latencyMs": 1521.227415999998
},
{
"questionId": "q16",
"format": "csv",
"model": "gpt-5-nano",
"expected": "89436",
"actual": "89436",
"correct": true,
"inputTokens": 2380,
"outputTokens": 72,
"latencyMs": 2918.142082999999
},
{
"questionId": "q16",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "89436",
"actual": "89436",
"correct": true,
"inputTokens": 2856,
"outputTokens": 6,
"latencyMs": 1049.085916
},
{
"questionId": "q16",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "89436",
"actual": "89436",
"correct": true,
"inputTokens": 6315,
"outputTokens": 136,
"latencyMs": 2414.9711669999997
},
{
"questionId": "q16",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "89436",
"actual": "89436",
"correct": true,
"inputTokens": 6365,
"outputTokens": 6,
"latencyMs": 1178.0064170000005
},
{
"questionId": "q16",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "89436",
"actual": "89436",
"correct": true,
"inputTokens": 5011,
"outputTokens": 72,
"latencyMs": 1772.788625000001
},
{
"questionId": "q16",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "89436",
"actual": "89436",
"correct": true,
"inputTokens": 5760,
"outputTokens": 6,
"latencyMs": 1134.7022499999985
},
{
"questionId": "q17",
"format": "json",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6392,
"outputTokens": 135,
"latencyMs": 2528.6098330000023
},
{
"questionId": "q17",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 7872,
"outputTokens": 4,
"latencyMs": 1353.3026250000003
},
{
"questionId": "q17",
"format": "toon",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2529,
"outputTokens": 71,
"latencyMs": 2286.120999999999
},
{
"questionId": "q17",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2984,
"outputTokens": 4,
"latencyMs": 961.078292000002
},
{
"questionId": "q17",
"format": "csv",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2383,
"outputTokens": 71,
"latencyMs": 3445.204249999999
},
{
"questionId": "q17",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2858,
"outputTokens": 4,
"latencyMs": 1003.445125000002
},
{
"questionId": "q17",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6318,
"outputTokens": 135,
"latencyMs": 2696.166874999999
},
{
"questionId": "q17",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6367,
"outputTokens": 4,
"latencyMs": 1063.340791999999
},
{
"questionId": "q17",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 5014,
"outputTokens": 135,
"latencyMs": 3367.6109579999975
},
{
"questionId": "q17",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 5762,
"outputTokens": 4,
"latencyMs": 1322.4013339999983
},
{
"questionId": "q18",
"format": "json",
"model": "gpt-5-nano",
"expected": "kelvin54@yahoo.com",
"actual": "kelvin54@yahoo.com",
"correct": true,
"inputTokens": 6390,
"outputTokens": 139,
"latencyMs": 2745.6627499999995
},
{
"questionId": "q18",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "kelvin54@yahoo.com",
"actual": "kelvin54@yahoo.com",
"correct": true,
"inputTokens": 7871,
"outputTokens": 10,
"latencyMs": 1312.9286670000001
},
{
"questionId": "q18",
"format": "toon",
"model": "gpt-5-nano",
"expected": "kelvin54@yahoo.com",
"actual": "kelvin54@yahoo.com",
"correct": true,
"inputTokens": 2527,
"outputTokens": 1483,
"latencyMs": 13678.859999999997
},
{
"questionId": "q18",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "kelvin54@yahoo.com",
"actual": "kelvin54@yahoo.com",
"correct": true,
"inputTokens": 2983,
"outputTokens": 10,
"latencyMs": 1030.3843339999985
},
{
"questionId": "q18",
"format": "csv",
"model": "gpt-5-nano",
"expected": "kelvin54@yahoo.com",
"actual": "kelvin54@yahoo.com",
"correct": true,
"inputTokens": 2381,
"outputTokens": 139,
"latencyMs": 2223.2737909999996
},
{
"questionId": "q18",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "kelvin54@yahoo.com",
"actual": "kelvin54@yahoo.com",
"correct": true,
"inputTokens": 2857,
"outputTokens": 10,
"latencyMs": 1224.2647080000024
},
{
"questionId": "q18",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "kelvin54@yahoo.com",
"actual": "kelvin54@yahoo.com",
"correct": true,
"inputTokens": 6316,
"outputTokens": 139,
"latencyMs": 3198.8672499999993
},
{
"questionId": "q18",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "kelvin54@yahoo.com",
"actual": "kelvin54@yahoo.com",
"correct": true,
"inputTokens": 6366,
"outputTokens": 10,
"latencyMs": 1234.557084
},
{
"questionId": "q18",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "kelvin54@yahoo.com",
"actual": "kelvin54@yahoo.com",
"correct": true,
"inputTokens": 5012,
"outputTokens": 139,
"latencyMs": 2861.692708999999
},
{
"questionId": "q18",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "kelvin54@yahoo.com",
"actual": "kelvin54@yahoo.com",
"correct": true,
"inputTokens": 5761,
"outputTokens": 10,
"latencyMs": 1284.2591250000005
},
{
"questionId": "q19",
"format": "json",
"model": "gpt-5-nano",
"expected": "143365",
"actual": "143365",
"correct": true,
"inputTokens": 6390,
"outputTokens": 136,
"latencyMs": 2741.803499999998
},
{
"questionId": "q19",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "143365",
"actual": "143365",
"correct": true,
"inputTokens": 7872,
"outputTokens": 6,
"latencyMs": 1096.6906249999993
},
{
"questionId": "q19",
"format": "toon",
"model": "gpt-5-nano",
"expected": "143365",
"actual": "143365",
"correct": true,
"inputTokens": 2527,
"outputTokens": 136,
"latencyMs": 3692.904416999998
},
{
"questionId": "q19",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "143365",
"actual": "143365",
"correct": true,
"inputTokens": 2984,
"outputTokens": 6,
"latencyMs": 1516.7794159999976
},
{
"questionId": "q19",
"format": "csv",
"model": "gpt-5-nano",
"expected": "143365",
"actual": "143365",
"correct": true,
"inputTokens": 2381,
"outputTokens": 392,
"latencyMs": 5068.4152909999975
},
{
"questionId": "q19",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "143365",
"actual": "143365",
"correct": true,
"inputTokens": 2858,
"outputTokens": 6,
"latencyMs": 1356.2728330000027
},
{
"questionId": "q19",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "143365",
"actual": "143365",
"correct": true,
"inputTokens": 6316,
"outputTokens": 136,
"latencyMs": 2866.8642500000024
},
{
"questionId": "q19",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "143365",
"actual": "143365",
"correct": true,
"inputTokens": 6367,
"outputTokens": 6,
"latencyMs": 1462.041624999998
},
{
"questionId": "q19",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "143365",
"actual": "143365",
"correct": true,
"inputTokens": 5012,
"outputTokens": 72,
"latencyMs": 2320.320083999999
},
{
"questionId": "q19",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "143365",
"actual": "143365",
"correct": true,
"inputTokens": 5762,
"outputTokens": 6,
"latencyMs": 1082.976666999999
},
{
"questionId": "q20",
"format": "json",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6389,
"outputTokens": 7,
"latencyMs": 2427.6330409999973
},
{
"questionId": "q20",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 7868,
"outputTokens": 4,
"latencyMs": 1108.7309170000008
},
{
"questionId": "q20",
"format": "toon",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2526,
"outputTokens": 71,
"latencyMs": 4405.948458000003
},
{
"questionId": "q20",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2980,
"outputTokens": 4,
"latencyMs": 1235.6647919999996
},
{
"questionId": "q20",
"format": "csv",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2380,
"outputTokens": 71,
"latencyMs": 2528.553082999999
},
{
"questionId": "q20",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2854,
"outputTokens": 4,
"latencyMs": 974.1328329999997
},
{
"questionId": "q20",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6315,
"outputTokens": 135,
"latencyMs": 2243.1775420000013
},
{
"questionId": "q20",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6363,
"outputTokens": 4,
"latencyMs": 2416.867124999997
},
{
"questionId": "q20",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 5011,
"outputTokens": 135,
"latencyMs": 2429.5548750000016
},
{
"questionId": "q20",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 5758,
"outputTokens": 4,
"latencyMs": 1257.326083
},
{
"questionId": "q21",
"format": "json",
"model": "gpt-5-nano",
"expected": "dean19@gmail.com",
"actual": "dean19@gmail.com",
"correct": true,
"inputTokens": 6393,
"outputTokens": 203,
"latencyMs": 4366.677041999996
},
{
"questionId": "q21",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "dean19@gmail.com",
"actual": "dean19@gmail.com",
"correct": true,
"inputTokens": 7876,
"outputTokens": 9,
"latencyMs": 1410.3295419999995
},
{
"questionId": "q21",
"format": "toon",
"model": "gpt-5-nano",
"expected": "dean19@gmail.com",
"actual": "dean19@gmail.com",
"correct": true,
"inputTokens": 2530,
"outputTokens": 75,
"latencyMs": 2834.2883330000004
},
{
"questionId": "q21",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "dean19@gmail.com",
"actual": "dean19@gmail.com",
"correct": true,
"inputTokens": 2988,
"outputTokens": 9,
"latencyMs": 1023.437750000001
},
{
"questionId": "q21",
"format": "csv",
"model": "gpt-5-nano",
"expected": "dean19@gmail.com",
"actual": "dean19@gmail.com",
"correct": true,
"inputTokens": 2384,
"outputTokens": 139,
"latencyMs": 3091.7722909999975
},
{
"questionId": "q21",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "dean19@gmail.com",
"actual": "dean19@gmail.com",
"correct": true,
"inputTokens": 2862,
"outputTokens": 9,
"latencyMs": 1910.5562920000011
},
{
"questionId": "q21",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "dean19@gmail.com",
"actual": "dean19@gmail.com",
"correct": true,
"inputTokens": 6319,
"outputTokens": 75,
"latencyMs": 2335.239207999999
},
{
"questionId": "q21",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "dean19@gmail.com",
"actual": "dean19@gmail.com",
"correct": true,
"inputTokens": 6371,
"outputTokens": 9,
"latencyMs": 1145.7144169999992
},
{
"questionId": "q21",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "dean19@gmail.com",
"actual": "dean19@gmail.com",
"correct": true,
"inputTokens": 5015,
"outputTokens": 75,
"latencyMs": 2204.0944169999966
},
{
"questionId": "q21",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "dean19@gmail.com",
"actual": "dean19@gmail.com",
"correct": true,
"inputTokens": 5766,
"outputTokens": 9,
"latencyMs": 1102.2122499999969
},
{
"questionId": "q22",
"format": "json",
"model": "gpt-5-nano",
"expected": "111314",
"actual": "111314",
"correct": true,
"inputTokens": 6391,
"outputTokens": 200,
"latencyMs": 3785.0480830000015
},
{
"questionId": "q22",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "111314",
"actual": "111314",
"correct": true,
"inputTokens": 7871,
"outputTokens": 6,
"latencyMs": 1147.6056669999962
},
{
"questionId": "q22",
"format": "toon",
"model": "gpt-5-nano",
"expected": "111314",
"actual": "111314",
"correct": true,
"inputTokens": 2528,
"outputTokens": 72,
"latencyMs": 3996.1190410000054
},
{
"questionId": "q22",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "111314",
"actual": "111314",
"correct": true,
"inputTokens": 2983,
"outputTokens": 6,
"latencyMs": 1101.5621670000037
},
{
"questionId": "q22",
"format": "csv",
"model": "gpt-5-nano",
"expected": "111314",
"actual": "111314",
"correct": true,
"inputTokens": 2382,
"outputTokens": 136,
"latencyMs": 2563.2732499999984
},
{
"questionId": "q22",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "111314",
"actual": "111314",
"correct": true,
"inputTokens": 2857,
"outputTokens": 6,
"latencyMs": 1224.5424589999966
},
{
"questionId": "q22",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "111314",
"actual": "111314",
"correct": true,
"inputTokens": 6317,
"outputTokens": 136,
"latencyMs": 2436.8848329999964
},
{
"questionId": "q22",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "111314",
"actual": "111314",
"correct": true,
"inputTokens": 6366,
"outputTokens": 6,
"latencyMs": 1500.1066250000003
},
{
"questionId": "q22",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "111314",
"actual": "111314",
"correct": true,
"inputTokens": 5013,
"outputTokens": 72,
"latencyMs": 2529.925833000001
},
{
"questionId": "q22",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "111314",
"actual": "111314",
"correct": true,
"inputTokens": 5761,
"outputTokens": 6,
"latencyMs": 1701.0276660000018
},
{
"questionId": "q23",
"format": "json",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6388,
"outputTokens": 135,
"latencyMs": 3078.5496249999997
},
{
"questionId": "q23",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 7868,
"outputTokens": 4,
"latencyMs": 1224.1848329999993
},
{
"questionId": "q23",
"format": "toon",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2525,
"outputTokens": 71,
"latencyMs": 2287.0156669999997
},
{
"questionId": "q23",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2980,
"outputTokens": 4,
"latencyMs": 1209.1454999999987
},
{
"questionId": "q23",
"format": "csv",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2379,
"outputTokens": 71,
"latencyMs": 2059.012499999997
},
{
"questionId": "q23",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2854,
"outputTokens": 4,
"latencyMs": 1393.596375000001
},
{
"questionId": "q23",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6314,
"outputTokens": 71,
"latencyMs": 1858.8989159999983
},
{
"questionId": "q23",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6363,
"outputTokens": 4,
"latencyMs": 1193.9375419999997
},
{
"questionId": "q23",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 5010,
"outputTokens": 135,
"latencyMs": 2755.0157499999987
},
{
"questionId": "q23",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 5758,
"outputTokens": 4,
"latencyMs": 1366.030666999999
},
{
"questionId": "q24",
"format": "json",
"model": "gpt-5-nano",
"expected": "laurel54@yahoo.com",
"actual": "laurel54@yahoo.com",
"correct": true,
"inputTokens": 6390,
"outputTokens": 395,
"latencyMs": 4352.137999999999
},
{
"questionId": "q24",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "laurel54@yahoo.com",
"actual": "laurel54@yahoo.com",
"correct": true,
"inputTokens": 7869,
"outputTokens": 10,
"latencyMs": 1093.9707500000004
},
{
"questionId": "q24",
"format": "toon",
"model": "gpt-5-nano",
"expected": "laurel54@yahoo.com",
"actual": "laurel54@yahoo.com",
"correct": true,
"inputTokens": 2527,
"outputTokens": 139,
"latencyMs": 2481.934500000003
},
{
"questionId": "q24",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "laurel54@yahoo.com",
"actual": "laurel54@yahoo.com",
"correct": true,
"inputTokens": 2981,
"outputTokens": 10,
"latencyMs": 1262.3894579999978
},
{
"questionId": "q24",
"format": "csv",
"model": "gpt-5-nano",
"expected": "laurel54@yahoo.com",
"actual": "laurel54@yahoo.com",
"correct": true,
"inputTokens": 2381,
"outputTokens": 75,
"latencyMs": 2360.7159170000014
},
{
"questionId": "q24",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "laurel54@yahoo.com",
"actual": "laurel54@yahoo.com",
"correct": true,
"inputTokens": 2855,
"outputTokens": 10,
"latencyMs": 1462.5894999999946
},
{
"questionId": "q24",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "laurel54@yahoo.com",
"actual": "laurel54@yahoo.com",
"correct": true,
"inputTokens": 6316,
"outputTokens": 75,
"latencyMs": 3247.478041000002
},
{
"questionId": "q24",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "laurel54@yahoo.com",
"actual": "laurel54@yahoo.com",
"correct": true,
"inputTokens": 6364,
"outputTokens": 10,
"latencyMs": 1693.1597089999996
},
{
"questionId": "q24",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "laurel54@yahoo.com",
"actual": "laurel54@yahoo.com",
"correct": true,
"inputTokens": 5012,
"outputTokens": 75,
"latencyMs": 1726.2765839999993
},
{
"questionId": "q24",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "laurel54@yahoo.com",
"actual": "laurel54@yahoo.com",
"correct": true,
"inputTokens": 5759,
"outputTokens": 10,
"latencyMs": 1605.044458000004
},
{
"questionId": "q25",
"format": "json",
"model": "gpt-5-nano",
"expected": "89553",
"actual": "89553",
"correct": true,
"inputTokens": 6391,
"outputTokens": 136,
"latencyMs": 2263.1207090000025
},
{
"questionId": "q25",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "89553",
"actual": "89553",
"correct": true,
"inputTokens": 7873,
"outputTokens": 6,
"latencyMs": 3789.016875000001
},
{
"questionId": "q25",
"format": "toon",
"model": "gpt-5-nano",
"expected": "89553",
"actual": "89553",
"correct": true,
"inputTokens": 2528,
"outputTokens": 72,
"latencyMs": 1829.9641669999983
},
{
"questionId": "q25",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "89553",
"actual": "89553",
"correct": true,
"inputTokens": 2985,
"outputTokens": 6,
"latencyMs": 989.6153750000012
},
{
"questionId": "q25",
"format": "csv",
"model": "gpt-5-nano",
"expected": "89553",
"actual": "89553",
"correct": true,
"inputTokens": 2382,
"outputTokens": 72,
"latencyMs": 2717.4773339999956
},
{
"questionId": "q25",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "89553",
"actual": "89553",
"correct": true,
"inputTokens": 2859,
"outputTokens": 6,
"latencyMs": 1717.8889999999956
},
{
"questionId": "q25",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "89553",
"actual": "46730",
"correct": false,
"inputTokens": 6317,
"outputTokens": 72,
"latencyMs": 5490.572667
},
{
"questionId": "q25",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "89553",
"actual": "89553",
"correct": true,
"inputTokens": 6368,
"outputTokens": 6,
"latencyMs": 1427.4055000000008
},
{
"questionId": "q25",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "89553",
"actual": "89553",
"correct": true,
"inputTokens": 5013,
"outputTokens": 264,
"latencyMs": 4052.875957999997
},
{
"questionId": "q25",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "89553",
"actual": "89553",
"correct": true,
"inputTokens": 5763,
"outputTokens": 6,
"latencyMs": 1586.255124999996
},
{
"questionId": "q26",
"format": "json",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6388,
"outputTokens": 135,
"latencyMs": 3787.343541000002
},
{
"questionId": "q26",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 7866,
"outputTokens": 4,
"latencyMs": 1196.934000000001
},
{
"questionId": "q26",
"format": "toon",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2525,
"outputTokens": 71,
"latencyMs": 2172.2377080000006
},
{
"questionId": "q26",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2978,
"outputTokens": 4,
"latencyMs": 1112.6987080000035
},
{
"questionId": "q26",
"format": "csv",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2379,
"outputTokens": 71,
"latencyMs": 2074.6067919999987
},
{
"questionId": "q26",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2852,
"outputTokens": 4,
"latencyMs": 1202.2165000000023
},
{
"questionId": "q26",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6314,
"outputTokens": 135,
"latencyMs": 3257.5967080000046
},
{
"questionId": "q26",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6361,
"outputTokens": 4,
"latencyMs": 1316.7435000000041
},
{
"questionId": "q26",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 5010,
"outputTokens": 71,
"latencyMs": 2391.9063749999987
},
{
"questionId": "q26",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 5756,
"outputTokens": 4,
"latencyMs": 1208.8820829999968
},
{
"questionId": "q27",
"format": "json",
"model": "gpt-5-nano",
"expected": "jayme.kertzmann77@gmail.com",
"actual": "jayme.kertzmann77@gmail.com",
"correct": true,
"inputTokens": 6391,
"outputTokens": 142,
"latencyMs": 2735.679790999995
},
{
"questionId": "q27",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "jayme.kertzmann77@gmail.com",
"actual": "jayme.kertzmann77@gmail.com",
"correct": true,
"inputTokens": 7871,
"outputTokens": 14,
"latencyMs": 1253.706624999999
},
{
"questionId": "q27",
"format": "toon",
"model": "gpt-5-nano",
"expected": "jayme.kertzmann77@gmail.com",
"actual": "jayme.kertzmann77@gmail.com",
"correct": true,
"inputTokens": 2528,
"outputTokens": 142,
"latencyMs": 2471.819457999998
},
{
"questionId": "q27",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "jayme.kertzmann77@gmail.com",
"actual": "jayme.kertzmann77@gmail.com",
"correct": true,
"inputTokens": 2983,
"outputTokens": 14,
"latencyMs": 1063.2195409999986
},
{
"questionId": "q27",
"format": "csv",
"model": "gpt-5-nano",
"expected": "jayme.kertzmann77@gmail.com",
"actual": "jayme.kertzmann77@gmail.com",
"correct": true,
"inputTokens": 2382,
"outputTokens": 142,
"latencyMs": 2061.6382500000036
},
{
"questionId": "q27",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "jayme.kertzmann77@gmail.com",
"actual": "jayme.kertzmann77@gmail.com",
"correct": true,
"inputTokens": 2857,
"outputTokens": 14,
"latencyMs": 1877.579082999997
},
{
"questionId": "q27",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "jayme.kertzmann77@gmail.com",
"actual": "jayme.kertzmann77@gmail.com",
"correct": true,
"inputTokens": 6317,
"outputTokens": 142,
"latencyMs": 3448.810375000001
},
{
"questionId": "q27",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "jayme.kertzmann77@gmail.com",
"actual": "jayme.kertzmann77@gmail.com",
"correct": true,
"inputTokens": 6366,
"outputTokens": 14,
"latencyMs": 1265.9410419999986
},
{
"questionId": "q27",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "jayme.kertzmann77@gmail.com",
"actual": "jayme.kertzmann77@gmail.com",
"correct": true,
"inputTokens": 5013,
"outputTokens": 78,
"latencyMs": 2152.5591669999994
},
{
"questionId": "q27",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "jayme.kertzmann77@gmail.com",
"actual": "jayme.kertzmann77@gmail.com",
"correct": true,
"inputTokens": 5761,
"outputTokens": 14,
"latencyMs": 1432.513583
},
{
"questionId": "q28",
"format": "json",
"model": "gpt-5-nano",
"expected": "104053",
"actual": "104053",
"correct": true,
"inputTokens": 6390,
"outputTokens": 136,
"latencyMs": 2707.4454169999954
},
{
"questionId": "q28",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "104053",
"actual": "104053",
"correct": true,
"inputTokens": 7871,
"outputTokens": 6,
"latencyMs": 1568.5869169999933
},
{
"questionId": "q28",
"format": "toon",
"model": "gpt-5-nano",
"expected": "104053",
"actual": "104053",
"correct": true,
"inputTokens": 2527,
"outputTokens": 136,
"latencyMs": 2373.4566669999986
},
{
"questionId": "q28",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "104053",
"actual": "104053",
"correct": true,
"inputTokens": 2983,
"outputTokens": 6,
"latencyMs": 1525.172749999998
},
{
"questionId": "q28",
"format": "csv",
"model": "gpt-5-nano",
"expected": "104053",
"actual": "104053",
"correct": true,
"inputTokens": 2381,
"outputTokens": 136,
"latencyMs": 9347.989583000002
},
{
"questionId": "q28",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "104053",
"actual": "104053",
"correct": true,
"inputTokens": 2857,
"outputTokens": 6,
"latencyMs": 1748.783334000007
},
{
"questionId": "q28",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "104053",
"actual": "104053",
"correct": true,
"inputTokens": 6316,
"outputTokens": 72,
"latencyMs": 1929.517458000002
},
{
"questionId": "q28",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "104053",
"actual": "104053",
"correct": true,
"inputTokens": 6366,
"outputTokens": 6,
"latencyMs": 1022.1345000000001
},
{
"questionId": "q28",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "104053",
"actual": "104053",
"correct": true,
"inputTokens": 5012,
"outputTokens": 136,
"latencyMs": 2102.925624999996
},
{
"questionId": "q28",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "104053",
"actual": "104053",
"correct": true,
"inputTokens": 5761,
"outputTokens": 6,
"latencyMs": 1471.7255839999998
},
{
"questionId": "q29",
"format": "json",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6391,
"outputTokens": 71,
"latencyMs": 1983.693041999999
},
{
"questionId": "q29",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 7872,
"outputTokens": 4,
"latencyMs": 1077.2119579999999
},
{
"questionId": "q29",
"format": "toon",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2528,
"outputTokens": 71,
"latencyMs": 2549.1221250000017
},
{
"questionId": "q29",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2984,
"outputTokens": 4,
"latencyMs": 921.1110840000038
},
{
"questionId": "q29",
"format": "csv",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2382,
"outputTokens": 135,
"latencyMs": 4070.615666999998
},
{
"questionId": "q29",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2858,
"outputTokens": 4,
"latencyMs": 974.754832999999
},
{
"questionId": "q29",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6317,
"outputTokens": 135,
"latencyMs": 2665.842083000003
},
{
"questionId": "q29",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6367,
"outputTokens": 4,
"latencyMs": 1081.2904160000035
},
{
"questionId": "q29",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 5013,
"outputTokens": 135,
"latencyMs": 2897.919332999998
},
{
"questionId": "q29",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 5762,
"outputTokens": 4,
"latencyMs": 1341.0955420000028
},
{
"questionId": "q30",
"format": "json",
"model": "gpt-5-nano",
"expected": "carley.bauch@yahoo.com",
"actual": "carley.bauch@yahoo.com",
"correct": true,
"inputTokens": 6390,
"outputTokens": 204,
"latencyMs": 3231.9646249999932
},
{
"questionId": "q30",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "carley.bauch@yahoo.com",
"actual": "carley.bauch@yahoo.com",
"correct": true,
"inputTokens": 7869,
"outputTokens": 12,
"latencyMs": 1288.5363330000037
},
{
"questionId": "q30",
"format": "toon",
"model": "gpt-5-nano",
"expected": "carley.bauch@yahoo.com",
"actual": "carley.bauch@yahoo.com",
"correct": true,
"inputTokens": 2527,
"outputTokens": 76,
"latencyMs": 2581.508915999999
},
{
"questionId": "q30",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "carley.bauch@yahoo.com",
"actual": "carley.bauch@yahoo.com",
"correct": true,
"inputTokens": 2981,
"outputTokens": 12,
"latencyMs": 1183.8337079999983
},
{
"questionId": "q30",
"format": "csv",
"model": "gpt-5-nano",
"expected": "carley.bauch@yahoo.com",
"actual": "carley.bauch@yahoo.com",
"correct": true,
"inputTokens": 2381,
"outputTokens": 140,
"latencyMs": 2073.944792000002
},
{
"questionId": "q30",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "carley.bauch@yahoo.com",
"actual": "carley.bauch@yahoo.com",
"correct": true,
"inputTokens": 2855,
"outputTokens": 12,
"latencyMs": 1302.5857499999984
},
{
"questionId": "q30",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "carley.bauch@yahoo.com",
"actual": "carley.bauch@yahoo.com",
"correct": true,
"inputTokens": 6316,
"outputTokens": 204,
"latencyMs": 3076.5304590000014
},
{
"questionId": "q30",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "carley.bauch@yahoo.com",
"actual": "carley.bauch@yahoo.com",
"correct": true,
"inputTokens": 6364,
"outputTokens": 12,
"latencyMs": 1110.9787920000017
},
{
"questionId": "q30",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "carley.bauch@yahoo.com",
"actual": "carley.bauch@yahoo.com",
"correct": true,
"inputTokens": 5012,
"outputTokens": 76,
"latencyMs": 3381.732917000001
},
{
"questionId": "q30",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "carley.bauch@yahoo.com",
"actual": "carley.bauch@yahoo.com",
"correct": true,
"inputTokens": 5759,
"outputTokens": 12,
"latencyMs": 1198.1488329999993
},
{
"questionId": "q31",
"format": "json",
"model": "gpt-5-nano",
"expected": "142029",
"actual": "142029",
"correct": true,
"inputTokens": 6393,
"outputTokens": 136,
"latencyMs": 2687.965959000001
},
{
"questionId": "q31",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "142029",
"actual": "142029",
"correct": true,
"inputTokens": 7874,
"outputTokens": 6,
"latencyMs": 2615.956250000003
},
{
"questionId": "q31",
"format": "toon",
"model": "gpt-5-nano",
"expected": "142029",
"actual": "142029",
"correct": true,
"inputTokens": 2530,
"outputTokens": 136,
"latencyMs": 2132.413249999998
},
{
"questionId": "q31",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "142029",
"actual": "142029",
"correct": true,
"inputTokens": 2986,
"outputTokens": 6,
"latencyMs": 1091.060666999998
},
{
"questionId": "q31",
"format": "csv",
"model": "gpt-5-nano",
"expected": "142029",
"actual": "142029",
"correct": true,
"inputTokens": 2384,
"outputTokens": 72,
"latencyMs": 2074.8201670000053
},
{
"questionId": "q31",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "142029",
"actual": "142029",
"correct": true,
"inputTokens": 2860,
"outputTokens": 6,
"latencyMs": 1622.2757499999934
},
{
"questionId": "q31",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "142029",
"actual": "142029",
"correct": true,
"inputTokens": 6319,
"outputTokens": 200,
"latencyMs": 3122.3756670000002
},
{
"questionId": "q31",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "142029",
"actual": "142029",
"correct": true,
"inputTokens": 6369,
"outputTokens": 6,
"latencyMs": 1175.7301249999946
},
{
"questionId": "q31",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "142029",
"actual": "142029",
"correct": true,
"inputTokens": 5015,
"outputTokens": 136,
"latencyMs": 2601.074916999998
},
{
"questionId": "q31",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "142029",
"actual": "142029",
"correct": true,
"inputTokens": 5764,
"outputTokens": 6,
"latencyMs": 1089.4757079999981
},
{
"questionId": "q32",
"format": "json",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6389,
"outputTokens": 135,
"latencyMs": 6939.617750000005
},
{
"questionId": "q32",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 7869,
"outputTokens": 4,
"latencyMs": 1207.9619999999995
},
{
"questionId": "q32",
"format": "toon",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2526,
"outputTokens": 135,
"latencyMs": 2784.063166
},
{
"questionId": "q32",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2981,
"outputTokens": 4,
"latencyMs": 1011.0956670000014
},
{
"questionId": "q32",
"format": "csv",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2380,
"outputTokens": 135,
"latencyMs": 3098.7147909999985
},
{
"questionId": "q32",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2855,
"outputTokens": 4,
"latencyMs": 983.9449170000007
},
{
"questionId": "q32",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6315,
"outputTokens": 135,
"latencyMs": 3889.572291999997
},
{
"questionId": "q32",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6364,
"outputTokens": 4,
"latencyMs": 1096.1613339999967
},
{
"questionId": "q32",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 5011,
"outputTokens": 71,
"latencyMs": 2484.078917000006
},
{
"questionId": "q32",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 5759,
"outputTokens": 4,
"latencyMs": 1150.418792000004
},
{
"questionId": "q33",
"format": "json",
"model": "gpt-5-nano",
"expected": "cheyenne_skiles@hotmail.com",
"actual": "cheyenne_skiles@hotmail.com",
"correct": true,
"inputTokens": 6393,
"outputTokens": 140,
"latencyMs": 2221.4447079999954
},
{
"questionId": "q33",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "cheyenne_skiles@hotmail.com",
"actual": "cheyenne_skiles@hotmail.com",
"correct": true,
"inputTokens": 7872,
"outputTokens": 14,
"latencyMs": 1193.9583749999947
},
{
"questionId": "q33",
"format": "toon",
"model": "gpt-5-nano",
"expected": "cheyenne_skiles@hotmail.com",
"actual": "cheyenne_skiles@hotmail.com",
"correct": true,
"inputTokens": 2530,
"outputTokens": 76,
"latencyMs": 2170.8865829999995
},
{
"questionId": "q33",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "cheyenne_skiles@hotmail.com",
"actual": "cheyenne_skiles@hotmail.com",
"correct": true,
"inputTokens": 2984,
"outputTokens": 14,
"latencyMs": 1247.6116660000043
},
{
"questionId": "q33",
"format": "csv",
"model": "gpt-5-nano",
"expected": "cheyenne_skiles@hotmail.com",
"actual": "cheyenne_skiles@hotmail.com",
"correct": true,
"inputTokens": 2384,
"outputTokens": 76,
"latencyMs": 3827.705667000002
},
{
"questionId": "q33",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "cheyenne_skiles@hotmail.com",
"actual": "cheyenne_skiles@hotmail.com",
"correct": true,
"inputTokens": 2858,
"outputTokens": 14,
"latencyMs": 1084.8218339999949
},
{
"questionId": "q33",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "cheyenne_skiles@hotmail.com",
"actual": "cheyenne_skiles@hotmail.com",
"correct": true,
"inputTokens": 6319,
"outputTokens": 140,
"latencyMs": 3311.8220839999994
},
{
"questionId": "q33",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "cheyenne_skiles@hotmail.com",
"actual": "cheyenne_skiles@hotmail.com",
"correct": true,
"inputTokens": 6367,
"outputTokens": 14,
"latencyMs": 1269.2092920000068
},
{
"questionId": "q33",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "cheyenne_skiles@hotmail.com",
"actual": "cheyenne_skiles@hotmail.com",
"correct": true,
"inputTokens": 5015,
"outputTokens": 140,
"latencyMs": 2648.3102500000023
},
{
"questionId": "q33",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "cheyenne_skiles@hotmail.com",
"actual": "cheyenne_skiles@hotmail.com",
"correct": true,
"inputTokens": 5762,
"outputTokens": 14,
"latencyMs": 1278.0403750000041
},
{
"questionId": "q34",
"format": "json",
"model": "gpt-5-nano",
"expected": "84650",
"actual": "84650",
"correct": true,
"inputTokens": 6391,
"outputTokens": 136,
"latencyMs": 3555.1511670000036
},
{
"questionId": "q34",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "84650",
"actual": "84650",
"correct": true,
"inputTokens": 7871,
"outputTokens": 6,
"latencyMs": 1317.5797499999971
},
{
"questionId": "q34",
"format": "toon",
"model": "gpt-5-nano",
"expected": "84650",
"actual": "84650",
"correct": true,
"inputTokens": 2528,
"outputTokens": 136,
"latencyMs": 2291.943041999999
},
{
"questionId": "q34",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "84650",
"actual": "84650",
"correct": true,
"inputTokens": 2983,
"outputTokens": 6,
"latencyMs": 2081.3947499999995
},
{
"questionId": "q34",
"format": "csv",
"model": "gpt-5-nano",
"expected": "84650",
"actual": "84650",
"correct": true,
"inputTokens": 2382,
"outputTokens": 72,
"latencyMs": 2067.9348329999993
},
{
"questionId": "q34",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "84650",
"actual": "84650",
"correct": true,
"inputTokens": 2857,
"outputTokens": 6,
"latencyMs": 1192.6603340000001
},
{
"questionId": "q34",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "84650",
"actual": "84650",
"correct": true,
"inputTokens": 6317,
"outputTokens": 200,
"latencyMs": 3044.592457999999
},
{
"questionId": "q34",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "84650",
"actual": "84650",
"correct": true,
"inputTokens": 6366,
"outputTokens": 6,
"latencyMs": 1106.2235409999994
},
{
"questionId": "q34",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "84650",
"actual": "84650",
"correct": true,
"inputTokens": 5013,
"outputTokens": 136,
"latencyMs": 2627.8240000000005
},
{
"questionId": "q34",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "84650",
"actual": "84650",
"correct": true,
"inputTokens": 5761,
"outputTokens": 6,
"latencyMs": 1379.9015
},
{
"questionId": "q35",
"format": "json",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6390,
"outputTokens": 263,
"latencyMs": 3705.3900829999984
},
{
"questionId": "q35",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 7871,
"outputTokens": 4,
"latencyMs": 1909.4442500000005
},
{
"questionId": "q35",
"format": "toon",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2527,
"outputTokens": 135,
"latencyMs": 2173.6019589999996
},
{
"questionId": "q35",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2983,
"outputTokens": 4,
"latencyMs": 1063.8584580000024
},
{
"questionId": "q35",
"format": "csv",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2381,
"outputTokens": 71,
"latencyMs": 1800.4930420000019
},
{
"questionId": "q35",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2857,
"outputTokens": 4,
"latencyMs": 1011.3969579999975
},
{
"questionId": "q35",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6316,
"outputTokens": 135,
"latencyMs": 2562.2492500000008
},
{
"questionId": "q35",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6366,
"outputTokens": 4,
"latencyMs": 1349.1809170000051
},
{
"questionId": "q35",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 5012,
"outputTokens": 71,
"latencyMs": 1883.7523750000037
},
{
"questionId": "q35",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 5761,
"outputTokens": 4,
"latencyMs": 1135.412292000001
},
{
"questionId": "q36",
"format": "json",
"model": "gpt-5-nano",
"expected": "macey.gottlieb5@yahoo.com",
"actual": "macey.gottlieb5@yahoo.com",
"correct": true,
"inputTokens": 6389,
"outputTokens": 334,
"latencyMs": 4067.161957999997
},
{
"questionId": "q36",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "macey.gottlieb5@yahoo.com",
"actual": "macey.gottlieb5@yahoo.com",
"correct": true,
"inputTokens": 7869,
"outputTokens": 14,
"latencyMs": 1333.0713749999995
},
{
"questionId": "q36",
"format": "toon",
"model": "gpt-5-nano",
"expected": "macey.gottlieb5@yahoo.com",
"actual": "macey.gottlieb5@yahoo.com",
"correct": true,
"inputTokens": 2526,
"outputTokens": 142,
"latencyMs": 2081.8315000000002
},
{
"questionId": "q36",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "macey.gottlieb5@yahoo.com",
"actual": "macey.gottlieb5@yahoo.com",
"correct": true,
"inputTokens": 2981,
"outputTokens": 14,
"latencyMs": 1231.0224579999995
},
{
"questionId": "q36",
"format": "csv",
"model": "gpt-5-nano",
"expected": "macey.gottlieb5@yahoo.com",
"actual": "macey.gottlieb5@yahoo.com",
"correct": true,
"inputTokens": 2380,
"outputTokens": 78,
"latencyMs": 2333.0360409999994
},
{
"questionId": "q36",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "macey.gottlieb5@yahoo.com",
"actual": "macey.gottlieb5@yahoo.com",
"correct": true,
"inputTokens": 2855,
"outputTokens": 14,
"latencyMs": 1175.1937500000058
},
{
"questionId": "q36",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "macey.gottlieb5@yahoo.com",
"actual": "macey.gottlieb5@yahoo.com",
"correct": true,
"inputTokens": 6315,
"outputTokens": 206,
"latencyMs": 7391.094749999997
},
{
"questionId": "q36",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "macey.gottlieb5@yahoo.com",
"actual": "macey.gottlieb5@yahoo.com",
"correct": true,
"inputTokens": 6364,
"outputTokens": 14,
"latencyMs": 1843.981458000002
},
{
"questionId": "q36",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "macey.gottlieb5@yahoo.com",
"actual": "macey.gottlieb5@yahoo.com",
"correct": true,
"inputTokens": 5011,
"outputTokens": 142,
"latencyMs": 2386.8134589999972
},
{
"questionId": "q36",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "macey.gottlieb5@yahoo.com",
"actual": "macey.gottlieb5@yahoo.com",
"correct": true,
"inputTokens": 5759,
"outputTokens": 14,
"latencyMs": 1449.751750000003
},
{
"questionId": "q37",
"format": "json",
"model": "gpt-5-nano",
"expected": "89773",
"actual": "89773",
"correct": true,
"inputTokens": 6389,
"outputTokens": 136,
"latencyMs": 4075.600666999999
},
{
"questionId": "q37",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "89773",
"actual": "89773",
"correct": true,
"inputTokens": 7868,
"outputTokens": 6,
"latencyMs": 985.1729999999952
},
{
"questionId": "q37",
"format": "toon",
"model": "gpt-5-nano",
"expected": "89773",
"actual": "89773",
"correct": true,
"inputTokens": 2526,
"outputTokens": 136,
"latencyMs": 2891.2602079999997
},
{
"questionId": "q37",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "89773",
"actual": "89773",
"correct": true,
"inputTokens": 2980,
"outputTokens": 6,
"latencyMs": 2073.129000000001
},
{
"questionId": "q37",
"format": "csv",
"model": "gpt-5-nano",
"expected": "89773",
"actual": "89773",
"correct": true,
"inputTokens": 2380,
"outputTokens": 72,
"latencyMs": 1894.3316669999986
},
{
"questionId": "q37",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "89773",
"actual": "89773",
"correct": true,
"inputTokens": 2854,
"outputTokens": 6,
"latencyMs": 1172.3735000000015
},
{
"questionId": "q37",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "89773",
"actual": "89773",
"correct": true,
"inputTokens": 6315,
"outputTokens": 72,
"latencyMs": 2456.6511249999967
},
{
"questionId": "q37",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "89773",
"actual": "89773",
"correct": true,
"inputTokens": 6363,
"outputTokens": 6,
"latencyMs": 1298.1367079999982
},
{
"questionId": "q37",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "89773",
"actual": "89773",
"correct": true,
"inputTokens": 5011,
"outputTokens": 136,
"latencyMs": 6018.304375
},
{
"questionId": "q37",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "89773",
"actual": "89773",
"correct": true,
"inputTokens": 5758,
"outputTokens": 6,
"latencyMs": 1103.9152499999982
},
{
"questionId": "q38",
"format": "json",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6389,
"outputTokens": 71,
"latencyMs": 3867.303832999998
},
{
"questionId": "q38",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 7868,
"outputTokens": 4,
"latencyMs": 1287.7528749999983
},
{
"questionId": "q38",
"format": "toon",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2526,
"outputTokens": 135,
"latencyMs": 2355.0305829999998
},
{
"questionId": "q38",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2980,
"outputTokens": 4,
"latencyMs": 1086.8424579999992
},
{
"questionId": "q38",
"format": "csv",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2380,
"outputTokens": 71,
"latencyMs": 3472.6323339999944
},
{
"questionId": "q38",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2854,
"outputTokens": 4,
"latencyMs": 948.3086249999978
},
{
"questionId": "q38",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6315,
"outputTokens": 71,
"latencyMs": 3343.3446659999972
},
{
"questionId": "q38",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6363,
"outputTokens": 4,
"latencyMs": 1048.567959
},
{
"questionId": "q38",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 5011,
"outputTokens": 71,
"latencyMs": 3761.141875000001
},
{
"questionId": "q38",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 5758,
"outputTokens": 4,
"latencyMs": 1130.9393339999951
},
{
"questionId": "q39",
"format": "json",
"model": "gpt-5-nano",
"expected": "georgianna_renner@yahoo.com",
"actual": "georgianna_renner@yahoo.com",
"correct": true,
"inputTokens": 6389,
"outputTokens": 79,
"latencyMs": 4200.215792000003
},
{
"questionId": "q39",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "georgianna_renner@yahoo.com",
"actual": "georgianna_renner@yahoo.com",
"correct": true,
"inputTokens": 7869,
"outputTokens": 13,
"latencyMs": 1351.981166999998
},
{
"questionId": "q39",
"format": "toon",
"model": "gpt-5-nano",
"expected": "georgianna_renner@yahoo.com",
"actual": "georgianna_renner@yahoo.com",
"correct": true,
"inputTokens": 2526,
"outputTokens": 143,
"latencyMs": 2465.4245840000003
},
{
"questionId": "q39",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "georgianna_renner@yahoo.com",
"actual": "georgianna_renner@yahoo.com",
"correct": true,
"inputTokens": 2981,
"outputTokens": 13,
"latencyMs": 885.4770840000056
},
{
"questionId": "q39",
"format": "csv",
"model": "gpt-5-nano",
"expected": "georgianna_renner@yahoo.com",
"actual": "georgianna_renner@yahoo.com",
"correct": true,
"inputTokens": 2380,
"outputTokens": 143,
"latencyMs": 2903.201958000005
},
{
"questionId": "q39",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "georgianna_renner@yahoo.com",
"actual": "georgianna_renner@yahoo.com",
"correct": true,
"inputTokens": 2855,
"outputTokens": 13,
"latencyMs": 1006.1219579999961
},
{
"questionId": "q39",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "georgianna_renner@yahoo.com",
"actual": "georgianna_renner@yahoo.com",
"correct": true,
"inputTokens": 6315,
"outputTokens": 207,
"latencyMs": 3253.900333999998
},
{
"questionId": "q39",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "georgianna_renner@yahoo.com",
"actual": "georgianna_renner@yahoo.com",
"correct": true,
"inputTokens": 6364,
"outputTokens": 13,
"latencyMs": 1219.713582999997
},
{
"questionId": "q39",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "georgianna_renner@yahoo.com",
"actual": "georgianna_renner@yahoo.com",
"correct": true,
"inputTokens": 5011,
"outputTokens": 143,
"latencyMs": 2335.6635000000024
},
{
"questionId": "q39",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "georgianna_renner@yahoo.com",
"actual": "georgianna_renner@yahoo.com",
"correct": true,
"inputTokens": 5759,
"outputTokens": 13,
"latencyMs": 1334.1358330000003
},
{
"questionId": "q40",
"format": "json",
"model": "gpt-5-nano",
"expected": "49741",
"actual": "49741",
"correct": true,
"inputTokens": 6390,
"outputTokens": 136,
"latencyMs": 1912.2536669999972
},
{
"questionId": "q40",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "49741",
"actual": "49741",
"correct": true,
"inputTokens": 7871,
"outputTokens": 6,
"latencyMs": 1104.4684160000033
},
{
"questionId": "q40",
"format": "toon",
"model": "gpt-5-nano",
"expected": "49741",
"actual": "49741",
"correct": true,
"inputTokens": 2527,
"outputTokens": 72,
"latencyMs": 2648.919750000001
},
{
"questionId": "q40",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "49741",
"actual": "49741",
"correct": true,
"inputTokens": 2983,
"outputTokens": 6,
"latencyMs": 1525.6309170000022
},
{
"questionId": "q40",
"format": "csv",
"model": "gpt-5-nano",
"expected": "49741",
"actual": "49741",
"correct": true,
"inputTokens": 2381,
"outputTokens": 136,
"latencyMs": 2736.3283749999973
},
{
"questionId": "q40",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "49741",
"actual": "144426",
"correct": false,
"inputTokens": 2857,
"outputTokens": 6,
"latencyMs": 1077.766334
},
{
"questionId": "q40",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "49741",
"actual": "49741",
"correct": true,
"inputTokens": 6316,
"outputTokens": 72,
"latencyMs": 2116.5284170000014
},
{
"questionId": "q40",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "49741",
"actual": "49741",
"correct": true,
"inputTokens": 6366,
"outputTokens": 6,
"latencyMs": 1159.7744170000005
},
{
"questionId": "q40",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "49741",
"actual": "49741",
"correct": true,
"inputTokens": 5012,
"outputTokens": 72,
"latencyMs": 2529.7074160000047
},
{
"questionId": "q40",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "49741",
"actual": "49741",
"correct": true,
"inputTokens": 5761,
"outputTokens": 6,
"latencyMs": 1604.601791999994
},
{
"questionId": "q41",
"format": "json",
"model": "gpt-5-nano",
"expected": "17",
"actual": "17",
"correct": true,
"inputTokens": 6387,
"outputTokens": 967,
"latencyMs": 8300.216583000001
},
{
"questionId": "q41",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "15",
"correct": false,
"inputTokens": 7865,
"outputTokens": 5,
"latencyMs": 1204.089749999992
},
{
"questionId": "q41",
"format": "toon",
"model": "gpt-5-nano",
"expected": "17",
"actual": "17",
"correct": true,
"inputTokens": 2524,
"outputTokens": 455,
"latencyMs": 5231.604541000001
},
{
"questionId": "q41",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "15",
"correct": false,
"inputTokens": 2977,
"outputTokens": 5,
"latencyMs": 1168.508707999994
},
{
"questionId": "q41",
"format": "csv",
"model": "gpt-5-nano",
"expected": "17",
"actual": "17",
"correct": true,
"inputTokens": 2378,
"outputTokens": 967,
"latencyMs": 8396.912500000006
},
{
"questionId": "q41",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "15",
"correct": false,
"inputTokens": 2851,
"outputTokens": 5,
"latencyMs": 1060.6276250000083
},
{
"questionId": "q41",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "17",
"actual": "17",
"correct": true,
"inputTokens": 6313,
"outputTokens": 775,
"latencyMs": 9340.763791999998
},
{
"questionId": "q41",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "15",
"correct": false,
"inputTokens": 6360,
"outputTokens": 5,
"latencyMs": 1020.8827080000046
},
{
"questionId": "q41",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "17",
"actual": "17",
"correct": true,
"inputTokens": 5009,
"outputTokens": 903,
"latencyMs": 8792.062000000005
},
{
"questionId": "q41",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "15",
"correct": false,
"inputTokens": 5755,
"outputTokens": 5,
"latencyMs": 1459.8301659999997
},
{
"questionId": "q42",
"format": "json",
"model": "gpt-5-nano",
"expected": "17",
"actual": "17",
"correct": true,
"inputTokens": 6387,
"outputTokens": 519,
"latencyMs": 6439.622583000004
},
{
"questionId": "q42",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "15",
"correct": false,
"inputTokens": 7865,
"outputTokens": 5,
"latencyMs": 1416.1659170000057
},
{
"questionId": "q42",
"format": "toon",
"model": "gpt-5-nano",
"expected": "17",
"actual": "17",
"correct": true,
"inputTokens": 2524,
"outputTokens": 903,
"latencyMs": 8064.398499999996
},
{
"questionId": "q42",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "14",
"correct": false,
"inputTokens": 2977,
"outputTokens": 5,
"latencyMs": 998.3781250000029
},
{
"questionId": "q42",
"format": "csv",
"model": "gpt-5-nano",
"expected": "17",
"actual": "17",
"correct": true,
"inputTokens": 2378,
"outputTokens": 647,
"latencyMs": 5498.786500000002
},
{
"questionId": "q42",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "15",
"correct": false,
"inputTokens": 2851,
"outputTokens": 5,
"latencyMs": 1343.9632910000073
},
{
"questionId": "q42",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "17",
"actual": "17",
"correct": true,
"inputTokens": 6313,
"outputTokens": 647,
"latencyMs": 7565.158291
},
{
"questionId": "q42",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "14",
"correct": false,
"inputTokens": 6360,
"outputTokens": 5,
"latencyMs": 1320.9714169999934
},
{
"questionId": "q42",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "17",
"actual": "17",
"correct": true,
"inputTokens": 5009,
"outputTokens": 839,
"latencyMs": 10626.395499999999
},
{
"questionId": "q42",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "15",
"correct": false,
"inputTokens": 5755,
"outputTokens": 5,
"latencyMs": 3227.584917
},
{
"questionId": "q43",
"format": "json",
"model": "gpt-5-nano",
"expected": "17",
"actual": "17",
"correct": true,
"inputTokens": 6387,
"outputTokens": 583,
"latencyMs": 6690.373416000002
},
{
"questionId": "q43",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "15",
"correct": false,
"inputTokens": 7865,
"outputTokens": 5,
"latencyMs": 1187.1296250000014
},
{
"questionId": "q43",
"format": "toon",
"model": "gpt-5-nano",
"expected": "17",
"actual": "17",
"correct": true,
"inputTokens": 2524,
"outputTokens": 519,
"latencyMs": 5081.884875000003
},
{
"questionId": "q43",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "15",
"correct": false,
"inputTokens": 2977,
"outputTokens": 5,
"latencyMs": 1576.2339999999967
},
{
"questionId": "q43",
"format": "csv",
"model": "gpt-5-nano",
"expected": "17",
"actual": "17",
"correct": true,
"inputTokens": 2378,
"outputTokens": 1031,
"latencyMs": 9927.5775
},
{
"questionId": "q43",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "15",
"correct": false,
"inputTokens": 2851,
"outputTokens": 5,
"latencyMs": 1169.6451669999951
},
{
"questionId": "q43",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "17",
"actual": "17",
"correct": true,
"inputTokens": 6313,
"outputTokens": 519,
"latencyMs": 6772.954291999995
},
{
"questionId": "q43",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "15",
"correct": false,
"inputTokens": 6360,
"outputTokens": 5,
"latencyMs": 1905.9189590000024
},
{
"questionId": "q43",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "17",
"actual": "17",
"correct": true,
"inputTokens": 5009,
"outputTokens": 455,
"latencyMs": 6827.424666999999
},
{
"questionId": "q43",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "15",
"correct": false,
"inputTokens": 5755,
"outputTokens": 5,
"latencyMs": 2121.3979160000017
},
{
"questionId": "q44",
"format": "json",
"model": "gpt-5-nano",
"expected": "17",
"actual": "17",
"correct": true,
"inputTokens": 6387,
"outputTokens": 519,
"latencyMs": 15235.099042000002
},
{
"questionId": "q44",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "15",
"correct": false,
"inputTokens": 7865,
"outputTokens": 5,
"latencyMs": 1182.0669170000037
},
{
"questionId": "q44",
"format": "toon",
"model": "gpt-5-nano",
"expected": "17",
"actual": "17",
"correct": true,
"inputTokens": 2524,
"outputTokens": 583,
"latencyMs": 6872.47600000001
},
{
"questionId": "q44",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "15",
"correct": false,
"inputTokens": 2977,
"outputTokens": 5,
"latencyMs": 931.0203749999928
},
{
"questionId": "q44",
"format": "csv",
"model": "gpt-5-nano",
"expected": "17",
"actual": "17",
"correct": true,
"inputTokens": 2378,
"outputTokens": 2311,
"latencyMs": 17952.683875000002
},
{
"questionId": "q44",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "15",
"correct": false,
"inputTokens": 2851,
"outputTokens": 5,
"latencyMs": 1167.8899999999994
},
{
"questionId": "q44",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "17",
"actual": "17",
"correct": true,
"inputTokens": 6313,
"outputTokens": 455,
"latencyMs": 6896.831916999989
},
{
"questionId": "q44",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "10",
"correct": false,
"inputTokens": 6360,
"outputTokens": 5,
"latencyMs": 1401.859083000003
},
{
"questionId": "q44",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "17",
"actual": "17",
"correct": true,
"inputTokens": 5009,
"outputTokens": 647,
"latencyMs": 5266.956917000003
},
{
"questionId": "q44",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "15",
"correct": false,
"inputTokens": 5755,
"outputTokens": 5,
"latencyMs": 1100.9057919999905
},
{
"questionId": "q45",
"format": "json",
"model": "gpt-5-nano",
"expected": "16",
"actual": "16",
"correct": true,
"inputTokens": 6387,
"outputTokens": 1095,
"latencyMs": 15621.264291999993
},
{
"questionId": "q45",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "16",
"actual": "12",
"correct": false,
"inputTokens": 7865,
"outputTokens": 5,
"latencyMs": 1063.5868750000081
},
{
"questionId": "q45",
"format": "toon",
"model": "gpt-5-nano",
"expected": "16",
"actual": "16",
"correct": true,
"inputTokens": 2524,
"outputTokens": 455,
"latencyMs": 5703.061916000006
},
{
"questionId": "q45",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "16",
"actual": "15",
"correct": false,
"inputTokens": 2977,
"outputTokens": 5,
"latencyMs": 1113.9432499999966
},
{
"questionId": "q45",
"format": "csv",
"model": "gpt-5-nano",
"expected": "16",
"actual": "16",
"correct": true,
"inputTokens": 2378,
"outputTokens": 3015,
"latencyMs": 22321.357124999995
},
{
"questionId": "q45",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "16",
"actual": "15",
"correct": false,
"inputTokens": 2851,
"outputTokens": 5,
"latencyMs": 968.0936249999941
},
{
"questionId": "q45",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "16",
"actual": "16",
"correct": true,
"inputTokens": 6313,
"outputTokens": 1287,
"latencyMs": 14521.080749999994
},
{
"questionId": "q45",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "16",
"actual": "12",
"correct": false,
"inputTokens": 6360,
"outputTokens": 5,
"latencyMs": 1228.1847500000003
},
{
"questionId": "q45",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "16",
"actual": "16",
"correct": true,
"inputTokens": 5009,
"outputTokens": 455,
"latencyMs": 5216.268042000011
},
{
"questionId": "q45",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "16",
"actual": "15",
"correct": false,
"inputTokens": 5755,
"outputTokens": 5,
"latencyMs": 1026.5127079999947
},
{
"questionId": "q46",
"format": "json",
"model": "gpt-5-nano",
"expected": "16",
"actual": "16",
"correct": true,
"inputTokens": 6387,
"outputTokens": 391,
"latencyMs": 4335.125541000001
},
{
"questionId": "q46",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "16",
"actual": "10",
"correct": false,
"inputTokens": 7865,
"outputTokens": 5,
"latencyMs": 1116.4177909999999
},
{
"questionId": "q46",
"format": "toon",
"model": "gpt-5-nano",
"expected": "16",
"actual": "16",
"correct": true,
"inputTokens": 2524,
"outputTokens": 583,
"latencyMs": 4128.823499999999
},
{
"questionId": "q46",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "16",
"actual": "15",
"correct": false,
"inputTokens": 2977,
"outputTokens": 5,
"latencyMs": 1105.622457999998
},
{
"questionId": "q46",
"format": "csv",
"model": "gpt-5-nano",
"expected": "16",
"actual": "16",
"correct": true,
"inputTokens": 2378,
"outputTokens": 839,
"latencyMs": 6542.58583299999
},
{
"questionId": "q46",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "16",
"actual": "15",
"correct": false,
"inputTokens": 2851,
"outputTokens": 5,
"latencyMs": 1084.2237909999967
},
{
"questionId": "q46",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "16",
"actual": "16",
"correct": true,
"inputTokens": 6313,
"outputTokens": 455,
"latencyMs": 5050.133375000005
},
{
"questionId": "q46",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "16",
"actual": "10",
"correct": false,
"inputTokens": 6360,
"outputTokens": 5,
"latencyMs": 1075.023709000001
},
{
"questionId": "q46",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "16",
"actual": "16",
"correct": true,
"inputTokens": 5009,
"outputTokens": 711,
"latencyMs": 9237.985791
},
{
"questionId": "q46",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "16",
"actual": "12",
"correct": false,
"inputTokens": 5755,
"outputTokens": 5,
"latencyMs": 1346.3510000000097
},
{
"questionId": "q47",
"format": "json",
"model": "gpt-5-nano",
"expected": "91",
"actual": "91",
"correct": true,
"inputTokens": 6392,
"outputTokens": 2375,
"latencyMs": 27655.89520900001
},
{
"questionId": "q47",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "91",
"actual": "89",
"correct": false,
"inputTokens": 7870,
"outputTokens": 5,
"latencyMs": 1315.7111659999937
},
{
"questionId": "q47",
"format": "toon",
"model": "gpt-5-nano",
"expected": "91",
"actual": "91",
"correct": true,
"inputTokens": 2529,
"outputTokens": 2695,
"latencyMs": 26482.504707999993
},
{
"questionId": "q47",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "91",
"actual": "85",
"correct": false,
"inputTokens": 2982,
"outputTokens": 5,
"latencyMs": 1368.221916999988
},
{
"questionId": "q47",
"format": "csv",
"model": "gpt-5-nano",
"expected": "91",
"actual": "91",
"correct": true,
"inputTokens": 2383,
"outputTokens": 1671,
"latencyMs": 18249.434333000012
},
{
"questionId": "q47",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "91",
"actual": "85",
"correct": false,
"inputTokens": 2856,
"outputTokens": 5,
"latencyMs": 1051.9521660000028
},
{
"questionId": "q47",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "91",
"actual": "91",
"correct": true,
"inputTokens": 6318,
"outputTokens": 1799,
"latencyMs": 15867.284083999999
},
{
"questionId": "q47",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "91",
"actual": "89",
"correct": false,
"inputTokens": 6365,
"outputTokens": 5,
"latencyMs": 1831.3835839999956
},
{
"questionId": "q47",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "91",
"actual": "91",
"correct": true,
"inputTokens": 5014,
"outputTokens": 2247,
"latencyMs": 19254.821666999997
},
{
"questionId": "q47",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "91",
"actual": "89",
"correct": false,
"inputTokens": 5760,
"outputTokens": 5,
"latencyMs": 1762.2908329999918
},
{
"questionId": "q48",
"format": "json",
"model": "gpt-5-nano",
"expected": "67",
"actual": "67",
"correct": true,
"inputTokens": 6392,
"outputTokens": 1479,
"latencyMs": 13444.104542000001
},
{
"questionId": "q48",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "67",
"actual": "57",
"correct": false,
"inputTokens": 7870,
"outputTokens": 5,
"latencyMs": 1182.2523340000043
},
{
"questionId": "q48",
"format": "toon",
"model": "gpt-5-nano",
"expected": "67",
"actual": "67",
"correct": true,
"inputTokens": 2529,
"outputTokens": 2183,
"latencyMs": 19257.86050000001
},
{
"questionId": "q48",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "67",
"actual": "47",
"correct": false,
"inputTokens": 2982,
"outputTokens": 5,
"latencyMs": 1081.3142080000107
},
{
"questionId": "q48",
"format": "csv",
"model": "gpt-5-nano",
"expected": "67",
"actual": "67",
"correct": true,
"inputTokens": 2383,
"outputTokens": 3463,
"latencyMs": 21384.707542000004
},
{
"questionId": "q48",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "67",
"actual": "47",
"correct": false,
"inputTokens": 2856,
"outputTokens": 5,
"latencyMs": 1051.6647080000112
},
{
"questionId": "q48",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "67",
"actual": "67",
"correct": true,
"inputTokens": 6318,
"outputTokens": 2439,
"latencyMs": 19519.416207999995
},
{
"questionId": "q48",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "67",
"actual": "47",
"correct": false,
"inputTokens": 6365,
"outputTokens": 5,
"latencyMs": 1060.1008749999892
},
{
"questionId": "q48",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "67",
"actual": "66",
"correct": false,
"inputTokens": 5014,
"outputTokens": 1991,
"latencyMs": 15234.403459000008
},
{
"questionId": "q48",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "67",
"actual": "57",
"correct": false,
"inputTokens": 5760,
"outputTokens": 5,
"latencyMs": 1208.8559589999932
},
{
"questionId": "q49",
"format": "json",
"model": "gpt-5-nano",
"expected": "41",
"actual": "41",
"correct": true,
"inputTokens": 6392,
"outputTokens": 1415,
"latencyMs": 14119.885540999996
},
{
"questionId": "q49",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "41",
"actual": "31",
"correct": false,
"inputTokens": 7870,
"outputTokens": 5,
"latencyMs": 1428.8373750000028
},
{
"questionId": "q49",
"format": "toon",
"model": "gpt-5-nano",
"expected": "41",
"actual": "41",
"correct": true,
"inputTokens": 2529,
"outputTokens": 1607,
"latencyMs": 13997.297709000006
},
{
"questionId": "q49",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "41",
"actual": "27",
"correct": false,
"inputTokens": 2982,
"outputTokens": 5,
"latencyMs": 1270.4412920000032
},
{
"questionId": "q49",
"format": "csv",
"model": "gpt-5-nano",
"expected": "41",
"actual": "41",
"correct": true,
"inputTokens": 2383,
"outputTokens": 1415,
"latencyMs": 13861.177167000002
},
{
"questionId": "q49",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "41",
"actual": "31",
"correct": false,
"inputTokens": 2856,
"outputTokens": 5,
"latencyMs": 916.5238340000069
},
{
"questionId": "q49",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "41",
"actual": "42",
"correct": false,
"inputTokens": 6318,
"outputTokens": 1799,
"latencyMs": 16007.06925
},
{
"questionId": "q49",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "41",
"actual": "27",
"correct": false,
"inputTokens": 6365,
"outputTokens": 5,
"latencyMs": 1426.0594579999888
},
{
"questionId": "q49",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "41",
"actual": "41",
"correct": true,
"inputTokens": 5014,
"outputTokens": 2055,
"latencyMs": 22966.680624999994
},
{
"questionId": "q49",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "41",
"actual": "31",
"correct": false,
"inputTokens": 5760,
"outputTokens": 5,
"latencyMs": 1044.6609999999928
},
{
"questionId": "q50",
"format": "json",
"model": "gpt-5-nano",
"expected": "26",
"actual": "26",
"correct": true,
"inputTokens": 6392,
"outputTokens": 1159,
"latencyMs": 10799.117333000002
},
{
"questionId": "q50",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "26",
"actual": "20",
"correct": false,
"inputTokens": 7870,
"outputTokens": 5,
"latencyMs": 1359.5568330000096
},
{
"questionId": "q50",
"format": "toon",
"model": "gpt-5-nano",
"expected": "26",
"actual": "26",
"correct": true,
"inputTokens": 2529,
"outputTokens": 1543,
"latencyMs": 13702.052542000005
},
{
"questionId": "q50",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "26",
"actual": "16",
"correct": false,
"inputTokens": 2982,
"outputTokens": 5,
"latencyMs": 967.0454159999936
},
{
"questionId": "q50",
"format": "csv",
"model": "gpt-5-nano",
"expected": "26",
"actual": "26",
"correct": true,
"inputTokens": 2383,
"outputTokens": 1671,
"latencyMs": 13116.871958000003
},
{
"questionId": "q50",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "26",
"actual": "16",
"correct": false,
"inputTokens": 2856,
"outputTokens": 5,
"latencyMs": 1088.8372910000035
},
{
"questionId": "q50",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "26",
"actual": "26",
"correct": true,
"inputTokens": 6318,
"outputTokens": 1543,
"latencyMs": 14387.148624999987
},
{
"questionId": "q50",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "26",
"actual": "16",
"correct": false,
"inputTokens": 6365,
"outputTokens": 5,
"latencyMs": 1273.9564170000085
},
{
"questionId": "q50",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "26",
"actual": "26",
"correct": true,
"inputTokens": 5014,
"outputTokens": 1223,
"latencyMs": 12143.083792000005
},
{
"questionId": "q50",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "26",
"actual": "20",
"correct": false,
"inputTokens": 5760,
"outputTokens": 5,
"latencyMs": 1032.9807079999882
},
{
"questionId": "q51",
"format": "json",
"model": "gpt-5-nano",
"expected": "78",
"actual": "78",
"correct": true,
"inputTokens": 6386,
"outputTokens": 2631,
"latencyMs": 23077.678417000003
},
{
"questionId": "q51",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "78",
"actual": "81",
"correct": false,
"inputTokens": 7864,
"outputTokens": 5,
"latencyMs": 1281.171417000005
},
{
"questionId": "q51",
"format": "toon",
"model": "gpt-5-nano",
"expected": "78",
"actual": "78",
"correct": true,
"inputTokens": 2523,
"outputTokens": 2759,
"latencyMs": 20331.962667
},
{
"questionId": "q51",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "78",
"actual": "78",
"correct": true,
"inputTokens": 2976,
"outputTokens": 5,
"latencyMs": 1014.3847079999978
},
{
"questionId": "q51",
"format": "csv",
"model": "gpt-5-nano",
"expected": "78",
"actual": "81",
"correct": false,
"inputTokens": 2377,
"outputTokens": 3335,
"latencyMs": 18037.630208000002
},
{
"questionId": "q51",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "78",
"actual": "73",
"correct": false,
"inputTokens": 2850,
"outputTokens": 5,
"latencyMs": 918.3078749999986
},
{
"questionId": "q51",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "78",
"actual": "78",
"correct": true,
"inputTokens": 6312,
"outputTokens": 1991,
"latencyMs": 15660.232958000008
},
{
"questionId": "q51",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "78",
"actual": "78",
"correct": true,
"inputTokens": 6359,
"outputTokens": 5,
"latencyMs": 1033.7647080000024
},
{
"questionId": "q51",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "78",
"actual": "78",
"correct": true,
"inputTokens": 5008,
"outputTokens": 4295,
"latencyMs": 26817.97
},
{
"questionId": "q51",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "78",
"actual": "77",
"correct": false,
"inputTokens": 5754,
"outputTokens": 5,
"latencyMs": 1348.084750000009
},
{
"questionId": "q52",
"format": "json",
"model": "gpt-5-nano",
"expected": "22",
"actual": "22",
"correct": true,
"inputTokens": 6386,
"outputTokens": 1223,
"latencyMs": 10273.866540999996
},
{
"questionId": "q52",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "22",
"actual": "15",
"correct": false,
"inputTokens": 7864,
"outputTokens": 5,
"latencyMs": 1081.604707999999
},
{
"questionId": "q52",
"format": "toon",
"model": "gpt-5-nano",
"expected": "22",
"actual": "22",
"correct": true,
"inputTokens": 2523,
"outputTokens": 903,
"latencyMs": 13862.020499999999
},
{
"questionId": "q52",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "22",
"actual": "16",
"correct": false,
"inputTokens": 2976,
"outputTokens": 5,
"latencyMs": 965.817916
},
{
"questionId": "q52",
"format": "csv",
"model": "gpt-5-nano",
"expected": "22",
"actual": "21",
"correct": false,
"inputTokens": 2377,
"outputTokens": 2631,
"latencyMs": 24254.82570799999
},
{
"questionId": "q52",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "22",
"actual": "20",
"correct": false,
"inputTokens": 2850,
"outputTokens": 5,
"latencyMs": 998.7978339999972
},
{
"questionId": "q52",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "22",
"actual": "22",
"correct": true,
"inputTokens": 6312,
"outputTokens": 1095,
"latencyMs": 10401.351500000004
},
{
"questionId": "q52",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "22",
"actual": "15",
"correct": false,
"inputTokens": 6359,
"outputTokens": 5,
"latencyMs": 1479.388791999998
},
{
"questionId": "q52",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "22",
"actual": "22",
"correct": true,
"inputTokens": 5008,
"outputTokens": 839,
"latencyMs": 8160.454833999989
},
{
"questionId": "q52",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "22",
"actual": "16",
"correct": false,
"inputTokens": 5754,
"outputTokens": 5,
"latencyMs": 1763.230291999993
},
{
"questionId": "q53",
"format": "json",
"model": "gpt-5-nano",
"expected": "12",
"actual": "12",
"correct": true,
"inputTokens": 6394,
"outputTokens": 1671,
"latencyMs": 14807.253333
},
{
"questionId": "q53",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "12",
"actual": "9",
"correct": false,
"inputTokens": 7872,
"outputTokens": 5,
"latencyMs": 1185.018333
},
{
"questionId": "q53",
"format": "toon",
"model": "gpt-5-nano",
"expected": "12",
"actual": "12",
"correct": true,
"inputTokens": 2531,
"outputTokens": 1607,
"latencyMs": 13592.477832999997
},
{
"questionId": "q53",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "12",
"actual": "9",
"correct": false,
"inputTokens": 2984,
"outputTokens": 5,
"latencyMs": 947.2789590000029
},
{
"questionId": "q53",
"format": "csv",
"model": "gpt-5-nano",
"expected": "12",
"actual": "12",
"correct": true,
"inputTokens": 2385,
"outputTokens": 2759,
"latencyMs": 22718.536041999992
},
{
"questionId": "q53",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "12",
"actual": "10",
"correct": false,
"inputTokens": 2858,
"outputTokens": 5,
"latencyMs": 973.4814580000093
},
{
"questionId": "q53",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "12",
"actual": "12",
"correct": true,
"inputTokens": 6320,
"outputTokens": 1031,
"latencyMs": 10025.186000000002
},
{
"questionId": "q53",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "12",
"actual": "8",
"correct": false,
"inputTokens": 6367,
"outputTokens": 5,
"latencyMs": 1038.4732499999955
},
{
"questionId": "q53",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "12",
"actual": "12",
"correct": true,
"inputTokens": 5016,
"outputTokens": 903,
"latencyMs": 12459.619915999996
},
{
"questionId": "q53",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "12",
"actual": "10",
"correct": false,
"inputTokens": 5762,
"outputTokens": 5,
"latencyMs": 1448.7940839999937
},
{
"questionId": "q54",
"format": "json",
"model": "gpt-5-nano",
"expected": "11",
"actual": "11",
"correct": true,
"inputTokens": 6394,
"outputTokens": 1415,
"latencyMs": 13094.547666999992
},
{
"questionId": "q54",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "11",
"actual": "7",
"correct": false,
"inputTokens": 7872,
"outputTokens": 5,
"latencyMs": 1241.7239169999957
},
{
"questionId": "q54",
"format": "toon",
"model": "gpt-5-nano",
"expected": "11",
"actual": "11",
"correct": true,
"inputTokens": 2531,
"outputTokens": 1031,
"latencyMs": 10610.864084
},
{
"questionId": "q54",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "11",
"actual": "6",
"correct": false,
"inputTokens": 2984,
"outputTokens": 5,
"latencyMs": 1100.7670829999988
},
{
"questionId": "q54",
"format": "csv",
"model": "gpt-5-nano",
"expected": "11",
"actual": "11",
"correct": true,
"inputTokens": 2385,
"outputTokens": 1095,
"latencyMs": 11523.293417000008
},
{
"questionId": "q54",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "11",
"actual": "8",
"correct": false,
"inputTokens": 2858,
"outputTokens": 5,
"latencyMs": 980.1522499999992
},
{
"questionId": "q54",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "11",
"actual": "11",
"correct": true,
"inputTokens": 6320,
"outputTokens": 1095,
"latencyMs": 8184.143375
},
{
"questionId": "q54",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "11",
"actual": "6",
"correct": false,
"inputTokens": 6367,
"outputTokens": 5,
"latencyMs": 1175.0723330000037
},
{
"questionId": "q54",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "11",
"actual": "11",
"correct": true,
"inputTokens": 5016,
"outputTokens": 1159,
"latencyMs": 13082.53912500001
},
{
"questionId": "q54",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "11",
"actual": "8",
"correct": false,
"inputTokens": 5762,
"outputTokens": 5,
"latencyMs": 1020.4026659999945
},
{
"questionId": "q55",
"format": "json",
"model": "gpt-5-nano",
"expected": "11",
"actual": "11",
"correct": true,
"inputTokens": 6394,
"outputTokens": 1223,
"latencyMs": 13166.679334
},
{
"questionId": "q55",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "11",
"actual": "8",
"correct": false,
"inputTokens": 7872,
"outputTokens": 5,
"latencyMs": 1090.0060839999933
},
{
"questionId": "q55",
"format": "toon",
"model": "gpt-5-nano",
"expected": "11",
"actual": "11",
"correct": true,
"inputTokens": 2531,
"outputTokens": 1287,
"latencyMs": 11181.234958000001
},
{
"questionId": "q55",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "11",
"actual": "7",
"correct": false,
"inputTokens": 2984,
"outputTokens": 5,
"latencyMs": 1365.1262080000015
},
{
"questionId": "q55",
"format": "csv",
"model": "gpt-5-nano",
"expected": "11",
"actual": "11",
"correct": true,
"inputTokens": 2385,
"outputTokens": 967,
"latencyMs": 9549.427916999994
},
{
"questionId": "q55",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "11",
"actual": "8",
"correct": false,
"inputTokens": 2858,
"outputTokens": 5,
"latencyMs": 981.8662500000064
},
{
"questionId": "q55",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "11",
"actual": "11",
"correct": true,
"inputTokens": 6320,
"outputTokens": 1223,
"latencyMs": 11591.030333000002
},
{
"questionId": "q55",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "11",
"actual": "7",
"correct": false,
"inputTokens": 6367,
"outputTokens": 5,
"latencyMs": 1430.038750000007
},
{
"questionId": "q55",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "11",
"actual": "10",
"correct": false,
"inputTokens": 5016,
"outputTokens": 1735,
"latencyMs": 11458.303500000009
},
{
"questionId": "q55",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "11",
"actual": "9",
"correct": false,
"inputTokens": 5762,
"outputTokens": 5,
"latencyMs": 1103.2402909999946
},
{
"questionId": "q56",
"format": "json",
"model": "gpt-5-nano",
"expected": "12",
"actual": "11",
"correct": false,
"inputTokens": 6394,
"outputTokens": 2631,
"latencyMs": 16900.63120799999
},
{
"questionId": "q56",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "12",
"actual": "7",
"correct": false,
"inputTokens": 7872,
"outputTokens": 5,
"latencyMs": 1043.442332999999
},
{
"questionId": "q56",
"format": "toon",
"model": "gpt-5-nano",
"expected": "12",
"actual": "12",
"correct": true,
"inputTokens": 2531,
"outputTokens": 839,
"latencyMs": 7278.612083
},
{
"questionId": "q56",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "12",
"actual": "6",
"correct": false,
"inputTokens": 2984,
"outputTokens": 5,
"latencyMs": 1705.2114999999903
},
{
"questionId": "q56",
"format": "csv",
"model": "gpt-5-nano",
"expected": "12",
"actual": "11",
"correct": false,
"inputTokens": 2385,
"outputTokens": 1415,
"latencyMs": 10625.603375000006
},
{
"questionId": "q56",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "12",
"actual": "7",
"correct": false,
"inputTokens": 2858,
"outputTokens": 5,
"latencyMs": 1081.0501670000085
},
{
"questionId": "q56",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "12",
"actual": "12",
"correct": true,
"inputTokens": 6320,
"outputTokens": 2055,
"latencyMs": 17548.71483299999
},
{
"questionId": "q56",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "12",
"actual": "7",
"correct": false,
"inputTokens": 6367,
"outputTokens": 5,
"latencyMs": 2302.2003750000003
},
{
"questionId": "q56",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "12",
"actual": "11",
"correct": false,
"inputTokens": 5016,
"outputTokens": 1287,
"latencyMs": 13187.201000000015
},
{
"questionId": "q56",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "12",
"actual": "8",
"correct": false,
"inputTokens": 5762,
"outputTokens": 5,
"latencyMs": 2621.4970829999947
},
{
"questionId": "q57",
"format": "json",
"model": "gpt-5-nano",
"expected": "62",
"actual": "62",
"correct": true,
"inputTokens": 6393,
"outputTokens": 3783,
"latencyMs": 29393.69395799999
},
{
"questionId": "q57",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "62",
"actual": "62",
"correct": true,
"inputTokens": 7872,
"outputTokens": 5,
"latencyMs": 1402.049291999996
},
{
"questionId": "q57",
"format": "toon",
"model": "gpt-5-nano",
"expected": "62",
"actual": "62",
"correct": true,
"inputTokens": 2530,
"outputTokens": 2823,
"latencyMs": 23696.75
},
{
"questionId": "q57",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "62",
"actual": "62",
"correct": true,
"inputTokens": 2984,
"outputTokens": 5,
"latencyMs": 1064.7778749999998
},
{
"questionId": "q57",
"format": "csv",
"model": "gpt-5-nano",
"expected": "62",
"actual": "64",
"correct": false,
"inputTokens": 2384,
"outputTokens": 3143,
"latencyMs": 28384.533249999993
},
{
"questionId": "q57",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "62",
"actual": "62",
"correct": true,
"inputTokens": 2858,
"outputTokens": 5,
"latencyMs": 889.2725839999912
},
{
"questionId": "q57",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "62",
"actual": "62",
"correct": true,
"inputTokens": 6319,
"outputTokens": 6663,
"latencyMs": 50113.09675
},
{
"questionId": "q57",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "62",
"actual": "62",
"correct": true,
"inputTokens": 6367,
"outputTokens": 5,
"latencyMs": 1074.8158330000006
},
{
"questionId": "q57",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "62",
"actual": "62",
"correct": true,
"inputTokens": 5015,
"outputTokens": 2631,
"latencyMs": 23841.036083999992
},
{
"questionId": "q57",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "62",
"actual": "62",
"correct": true,
"inputTokens": 5762,
"outputTokens": 5,
"latencyMs": 1010.4629169999971
},
{
"questionId": "q58",
"format": "json",
"model": "gpt-5-nano",
"expected": "45",
"actual": "45",
"correct": true,
"inputTokens": 6393,
"outputTokens": 2247,
"latencyMs": 18818.030874999997
},
{
"questionId": "q58",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "45",
"actual": "42",
"correct": false,
"inputTokens": 7872,
"outputTokens": 5,
"latencyMs": 1203.152833
},
{
"questionId": "q58",
"format": "toon",
"model": "gpt-5-nano",
"expected": "45",
"actual": "45",
"correct": true,
"inputTokens": 2530,
"outputTokens": 2631,
"latencyMs": 21987.539915999994
},
{
"questionId": "q58",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "45",
"actual": "42",
"correct": false,
"inputTokens": 2984,
"outputTokens": 5,
"latencyMs": 1000.0181669999874
},
{
"questionId": "q58",
"format": "csv",
"model": "gpt-5-nano",
"expected": "45",
"actual": "46",
"correct": false,
"inputTokens": 2384,
"outputTokens": 3079,
"latencyMs": 24534.847250000006
},
{
"questionId": "q58",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "45",
"actual": "42",
"correct": false,
"inputTokens": 2858,
"outputTokens": 5,
"latencyMs": 1125.7029999999795
},
{
"questionId": "q58",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "45",
"actual": "45",
"correct": true,
"inputTokens": 6319,
"outputTokens": 2823,
"latencyMs": 27053.90824999998
},
{
"questionId": "q58",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "45",
"actual": "42",
"correct": false,
"inputTokens": 6367,
"outputTokens": 5,
"latencyMs": 1474.1193330000096
},
{
"questionId": "q58",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "45",
"actual": "45",
"correct": true,
"inputTokens": 5015,
"outputTokens": 2567,
"latencyMs": 21642.824207999976
},
{
"questionId": "q58",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "45",
"actual": "38",
"correct": false,
"inputTokens": 5762,
"outputTokens": 5,
"latencyMs": 1170.1535830000066
},
{
"questionId": "q59",
"format": "json",
"model": "gpt-5-nano",
"expected": "96.17",
"actual": "96.17",
"correct": true,
"inputTokens": 9739,
"outputTokens": 73,
"latencyMs": 2340.6126670000085
},
{
"questionId": "q59",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "96.17",
"actual": "96.17",
"correct": true,
"inputTokens": 11907,
"outputTokens": 7,
"latencyMs": 1337.4746670000022
},
{
"questionId": "q59",
"format": "toon",
"model": "gpt-5-nano",
"expected": "96.17",
"actual": "96.17",
"correct": true,
"inputTokens": 6013,
"outputTokens": 137,
"latencyMs": 2275.1715830000176
},
{
"questionId": "q59",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "96.17",
"actual": "96.17",
"correct": true,
"inputTokens": 6993,
"outputTokens": 7,
"latencyMs": 1086.9557499999937
},
{
"questionId": "q59",
"format": "csv",
"model": "gpt-5-nano",
"expected": "96.17",
"actual": "96.17",
"correct": true,
"inputTokens": 6781,
"outputTokens": 137,
"latencyMs": 2881.4037499999977
},
{
"questionId": "q59",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "96.17",
"actual": "96.17",
"correct": true,
"inputTokens": 8414,
"outputTokens": 7,
"latencyMs": 1172.774000000005
},
{
"questionId": "q59",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "96.17",
"actual": "96.17",
"correct": true,
"inputTokens": 9158,
"outputTokens": 201,
"latencyMs": 7706.478582999989
},
{
"questionId": "q59",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "96.17",
"actual": "96.17",
"correct": true,
"inputTokens": 9289,
"outputTokens": 7,
"latencyMs": 1106.0717920000025
},
{
"questionId": "q59",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "96.17",
"actual": "96.17",
"correct": true,
"inputTokens": 7373,
"outputTokens": 137,
"latencyMs": 6185.161250000005
},
{
"questionId": "q59",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "96.17",
"actual": "96.17",
"correct": true,
"inputTokens": 8385,
"outputTokens": 7,
"latencyMs": 1388.4410000000207
},
{
"questionId": "q60",
"format": "json",
"model": "gpt-5-nano",
"expected": "shipped",
"actual": "shipped",
"correct": true,
"inputTokens": 9738,
"outputTokens": 136,
"latencyMs": 6699.9394589999865
},
{
"questionId": "q60",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "shipped",
"actual": "shipped",
"correct": true,
"inputTokens": 11906,
"outputTokens": 4,
"latencyMs": 1152.8117919999931
},
{
"questionId": "q60",
"format": "toon",
"model": "gpt-5-nano",
"expected": "shipped",
"actual": "shipped",
"correct": true,
"inputTokens": 6012,
"outputTokens": 136,
"latencyMs": 2446.019666999986
},
{
"questionId": "q60",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "shipped",
"actual": "shipped",
"correct": true,
"inputTokens": 6992,
"outputTokens": 4,
"latencyMs": 1046.3494580000115
},
{
"questionId": "q60",
"format": "csv",
"model": "gpt-5-nano",
"expected": "shipped",
"actual": "shipped",
"correct": true,
"inputTokens": 6780,
"outputTokens": 200,
"latencyMs": 6084.429165999987
},
{
"questionId": "q60",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "shipped",
"actual": "shipped",
"correct": true,
"inputTokens": 8413,
"outputTokens": 4,
"latencyMs": 1787.2428749999963
},
{
"questionId": "q60",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "shipped",
"actual": "shipped",
"correct": true,
"inputTokens": 9157,
"outputTokens": 264,
"latencyMs": 5364.3007919999945
},
{
"questionId": "q60",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "shipped",
"actual": "shipped",
"correct": true,
"inputTokens": 9288,
"outputTokens": 4,
"latencyMs": 1269.2162499999977
},
{
"questionId": "q60",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "shipped",
"actual": "shipped",
"correct": true,
"inputTokens": 7372,
"outputTokens": 72,
"latencyMs": 2381.514374999999
},
{
"questionId": "q60",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "shipped",
"actual": "shipped",
"correct": true,
"inputTokens": 8384,
"outputTokens": 4,
"latencyMs": 1222.1361669999897
},
{
"questionId": "q61",
"format": "json",
"model": "gpt-5-nano",
"expected": "599.39",
"actual": "599.39",
"correct": true,
"inputTokens": 9739,
"outputTokens": 201,
"latencyMs": 3641.536167000013
},
{
"questionId": "q61",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "599.39",
"actual": "599.39",
"correct": true,
"inputTokens": 11907,
"outputTokens": 7,
"latencyMs": 2457.5752079999947
},
{
"questionId": "q61",
"format": "toon",
"model": "gpt-5-nano",
"expected": "599.39",
"actual": "599.39",
"correct": true,
"inputTokens": 6013,
"outputTokens": 201,
"latencyMs": 3384.6115839999984
},
{
"questionId": "q61",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "599.39",
"actual": "599.39",
"correct": true,
"inputTokens": 6993,
"outputTokens": 7,
"latencyMs": 1372.8756669999857
},
{
"questionId": "q61",
"format": "csv",
"model": "gpt-5-nano",
"expected": "599.39",
"actual": "599.39",
"correct": true,
"inputTokens": 6781,
"outputTokens": 265,
"latencyMs": 5826.962750000006
},
{
"questionId": "q61",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "599.39",
"actual": "599.39",
"correct": true,
"inputTokens": 8414,
"outputTokens": 7,
"latencyMs": 1303.1691670000146
},
{
"questionId": "q61",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "599.39",
"actual": "599.39",
"correct": true,
"inputTokens": 9158,
"outputTokens": 265,
"latencyMs": 3602.1091250000172
},
{
"questionId": "q61",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "599.39",
"actual": "599.39",
"correct": true,
"inputTokens": 9289,
"outputTokens": 7,
"latencyMs": 1451.1585410000116
},
{
"questionId": "q61",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "599.39",
"actual": "599.39",
"correct": true,
"inputTokens": 7373,
"outputTokens": 137,
"latencyMs": 2453.183083000011
},
{
"questionId": "q61",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "599.39",
"actual": "599.39",
"correct": true,
"inputTokens": 8385,
"outputTokens": 7,
"latencyMs": 1152.136541999993
},
{
"questionId": "q62",
"format": "json",
"model": "gpt-5-nano",
"expected": "processing",
"actual": "processing",
"correct": true,
"inputTokens": 9738,
"outputTokens": 199,
"latencyMs": 5025.56916699998
},
{
"questionId": "q62",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "processing",
"actual": "processing",
"correct": true,
"inputTokens": 11906,
"outputTokens": 4,
"latencyMs": 1111.5014169999922
},
{
"questionId": "q62",
"format": "toon",
"model": "gpt-5-nano",
"expected": "processing",
"actual": "processing",
"correct": true,
"inputTokens": 6012,
"outputTokens": 199,
"latencyMs": 3548.9061660000007
},
{
"questionId": "q62",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "processing",
"actual": "processing",
"correct": true,
"inputTokens": 6992,
"outputTokens": 4,
"latencyMs": 1404.0692500000005
},
{
"questionId": "q62",
"format": "csv",
"model": "gpt-5-nano",
"expected": "processing",
"actual": "processing",
"correct": true,
"inputTokens": 6780,
"outputTokens": 135,
"latencyMs": 2879.9619169999787
},
{
"questionId": "q62",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "processing",
"actual": "processing",
"correct": true,
"inputTokens": 8413,
"outputTokens": 4,
"latencyMs": 1258.860249999998
},
{
"questionId": "q62",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "processing",
"actual": "processing",
"correct": true,
"inputTokens": 9157,
"outputTokens": 263,
"latencyMs": 7819.738958000002
},
{
"questionId": "q62",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "processing",
"actual": "processing",
"correct": true,
"inputTokens": 9288,
"outputTokens": 4,
"latencyMs": 1495.973915999988
},
{
"questionId": "q62",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "processing",
"actual": "processing",
"correct": true,
"inputTokens": 7372,
"outputTokens": 135,
"latencyMs": 3092.4329169999983
},
{
"questionId": "q62",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "processing",
"actual": "processing",
"correct": true,
"inputTokens": 8384,
"outputTokens": 4,
"latencyMs": 1268.1641250000102
},
{
"questionId": "q63",
"format": "json",
"model": "gpt-5-nano",
"expected": "528.71",
"actual": "528.71",
"correct": true,
"inputTokens": 9739,
"outputTokens": 265,
"latencyMs": 4409.96212500002
},
{
"questionId": "q63",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "528.71",
"actual": "528.71",
"correct": true,
"inputTokens": 11907,
"outputTokens": 7,
"latencyMs": 1422.6079999999783
},
{
"questionId": "q63",
"format": "toon",
"model": "gpt-5-nano",
"expected": "528.71",
"actual": "528.71",
"correct": true,
"inputTokens": 6013,
"outputTokens": 329,
"latencyMs": 3593.100334000017
},
{
"questionId": "q63",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "528.71",
"actual": "528.71",
"correct": true,
"inputTokens": 6993,
"outputTokens": 7,
"latencyMs": 1474.3911249999946
},
{
"questionId": "q63",
"format": "csv",
"model": "gpt-5-nano",
"expected": "528.71",
"actual": "528.71",
"correct": true,
"inputTokens": 6781,
"outputTokens": 265,
"latencyMs": 5419.795374999987
},
{
"questionId": "q63",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "528.71",
"actual": "528.71",
"correct": true,
"inputTokens": 8414,
"outputTokens": 7,
"latencyMs": 1059.3489999999874
},
{
"questionId": "q63",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "528.71",
"actual": "528.71",
"correct": true,
"inputTokens": 9158,
"outputTokens": 265,
"latencyMs": 4783.504167000006
},
{
"questionId": "q63",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "528.71",
"actual": "528.71",
"correct": true,
"inputTokens": 9289,
"outputTokens": 7,
"latencyMs": 1340.6675410000025
},
{
"questionId": "q63",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "528.71",
"actual": "528.71",
"correct": true,
"inputTokens": 7373,
"outputTokens": 329,
"latencyMs": 4222.140958000004
},
{
"questionId": "q63",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "528.71",
"actual": "528.71",
"correct": true,
"inputTokens": 8385,
"outputTokens": 7,
"latencyMs": 1169.892125000013
},
{
"questionId": "q64",
"format": "json",
"model": "gpt-5-nano",
"expected": "pending",
"actual": "pending",
"correct": true,
"inputTokens": 9738,
"outputTokens": 135,
"latencyMs": 2854.8382500000007
},
{
"questionId": "q64",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "pending",
"actual": "pending",
"correct": true,
"inputTokens": 11906,
"outputTokens": 4,
"latencyMs": 1077.335374999995
},
{
"questionId": "q64",
"format": "toon",
"model": "gpt-5-nano",
"expected": "pending",
"actual": "pending",
"correct": true,
"inputTokens": 6012,
"outputTokens": 135,
"latencyMs": 2525.2092499999853
},
{
"questionId": "q64",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "pending",
"actual": "pending",
"correct": true,
"inputTokens": 6992,
"outputTokens": 4,
"latencyMs": 2100.2050000000163
},
{
"questionId": "q64",
"format": "csv",
"model": "gpt-5-nano",
"expected": "pending",
"actual": "pending",
"correct": true,
"inputTokens": 6780,
"outputTokens": 263,
"latencyMs": 5882.592499999999
},
{
"questionId": "q64",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "pending",
"actual": "pending",
"correct": true,
"inputTokens": 8413,
"outputTokens": 4,
"latencyMs": 1168.5295410000253
},
{
"questionId": "q64",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "pending",
"actual": "pending",
"correct": true,
"inputTokens": 9157,
"outputTokens": 263,
"latencyMs": 3944.433083000011
},
{
"questionId": "q64",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "pending",
"actual": "pending",
"correct": true,
"inputTokens": 9288,
"outputTokens": 4,
"latencyMs": 1882.1263749999925
},
{
"questionId": "q64",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "pending",
"actual": "pending",
"correct": true,
"inputTokens": 7372,
"outputTokens": 135,
"latencyMs": 1657.7255829999922
},
{
"questionId": "q64",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "pending",
"actual": "pending",
"correct": true,
"inputTokens": 8384,
"outputTokens": 4,
"latencyMs": 1056.5719169999938
},
{
"questionId": "q65",
"format": "json",
"model": "gpt-5-nano",
"expected": "1687.82",
"actual": "1687.82",
"correct": true,
"inputTokens": 9739,
"outputTokens": 266,
"latencyMs": 5764.2531250000175
},
{
"questionId": "q65",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "1687.82",
"actual": "1687.82",
"correct": true,
"inputTokens": 11907,
"outputTokens": 8,
"latencyMs": 1241.8239590000012
},
{
"questionId": "q65",
"format": "toon",
"model": "gpt-5-nano",
"expected": "1687.82",
"actual": "1687.82",
"correct": true,
"inputTokens": 6013,
"outputTokens": 266,
"latencyMs": 3203.148416000011
},
{
"questionId": "q65",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "1687.82",
"actual": "1687.82",
"correct": true,
"inputTokens": 6993,
"outputTokens": 8,
"latencyMs": 1395.2265419999894
},
{
"questionId": "q65",
"format": "csv",
"model": "gpt-5-nano",
"expected": "1687.82",
"actual": "1687.82",
"correct": true,
"inputTokens": 6781,
"outputTokens": 330,
"latencyMs": 3854.1738750000077
},
{
"questionId": "q65",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "1687.82",
"actual": "1687.82",
"correct": true,
"inputTokens": 8414,
"outputTokens": 8,
"latencyMs": 1868.680457999988
},
{
"questionId": "q65",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "1687.82",
"actual": "1687.82",
"correct": true,
"inputTokens": 9158,
"outputTokens": 330,
"latencyMs": 4486.571708000003
},
{
"questionId": "q65",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "1687.82",
"actual": "1687.82",
"correct": true,
"inputTokens": 9289,
"outputTokens": 8,
"latencyMs": 1336.9320829999924
},
{
"questionId": "q65",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "1687.82",
"actual": "1687.82",
"correct": true,
"inputTokens": 7373,
"outputTokens": 266,
"latencyMs": 3571.6664579999924
},
{
"questionId": "q65",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "1687.82",
"actual": "1687.82",
"correct": true,
"inputTokens": 8385,
"outputTokens": 8,
"latencyMs": 1179.5032920000085
},
{
"questionId": "q66",
"format": "json",
"model": "gpt-5-nano",
"expected": "cancelled",
"actual": "cancelled",
"correct": true,
"inputTokens": 9738,
"outputTokens": 200,
"latencyMs": 3395.709499999997
},
{
"questionId": "q66",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "cancelled",
"actual": "cancelled",
"correct": true,
"inputTokens": 11906,
"outputTokens": 4,
"latencyMs": 1374.4573329999985
},
{
"questionId": "q66",
"format": "toon",
"model": "gpt-5-nano",
"expected": "cancelled",
"actual": "cancelled",
"correct": true,
"inputTokens": 6012,
"outputTokens": 200,
"latencyMs": 3162.779542000004
},
{
"questionId": "q66",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "cancelled",
"actual": "cancelled",
"correct": true,
"inputTokens": 6992,
"outputTokens": 4,
"latencyMs": 1010.6076670000039
},
{
"questionId": "q66",
"format": "csv",
"model": "gpt-5-nano",
"expected": "cancelled",
"actual": "cancelled",
"correct": true,
"inputTokens": 6780,
"outputTokens": 328,
"latencyMs": 3606.7964999999967
},
{
"questionId": "q66",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "cancelled",
"actual": "cancelled",
"correct": true,
"inputTokens": 8413,
"outputTokens": 4,
"latencyMs": 1432.5227920000034
},
{
"questionId": "q66",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "cancelled",
"actual": "cancelled",
"correct": true,
"inputTokens": 9157,
"outputTokens": 328,
"latencyMs": 2916.351958000014
},
{
"questionId": "q66",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "cancelled",
"actual": "cancelled",
"correct": true,
"inputTokens": 9288,
"outputTokens": 4,
"latencyMs": 1207.7237920000043
},
{
"questionId": "q66",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "cancelled",
"actual": "cancelled",
"correct": true,
"inputTokens": 7372,
"outputTokens": 136,
"latencyMs": 2741.256458000018
},
{
"questionId": "q66",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "cancelled",
"actual": "cancelled",
"correct": true,
"inputTokens": 8384,
"outputTokens": 4,
"latencyMs": 1385.7817920000234
},
{
"questionId": "q67",
"format": "json",
"model": "gpt-5-nano",
"expected": "423.6",
"actual": "423.6",
"correct": true,
"inputTokens": 9739,
"outputTokens": 201,
"latencyMs": 4731.81024999998
},
{
"questionId": "q67",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "423.6",
"actual": "423.6",
"correct": true,
"inputTokens": 11907,
"outputTokens": 7,
"latencyMs": 1572.4971659999865
},
{
"questionId": "q67",
"format": "toon",
"model": "gpt-5-nano",
"expected": "423.6",
"actual": "423.6",
"correct": true,
"inputTokens": 6013,
"outputTokens": 137,
"latencyMs": 2684.556333000015
},
{
"questionId": "q67",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "423.6",
"actual": "423.6",
"correct": true,
"inputTokens": 6993,
"outputTokens": 7,
"latencyMs": 1314.9989999999816
},
{
"questionId": "q67",
"format": "csv",
"model": "gpt-5-nano",
"expected": "423.6",
"actual": "423.6",
"correct": true,
"inputTokens": 6781,
"outputTokens": 137,
"latencyMs": 2746.457541999989
},
{
"questionId": "q67",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "423.6",
"actual": "423.6",
"correct": true,
"inputTokens": 8414,
"outputTokens": 7,
"latencyMs": 1254.8903329999885
},
{
"questionId": "q67",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "423.6",
"actual": "423.6",
"correct": true,
"inputTokens": 9158,
"outputTokens": 137,
"latencyMs": 4298.293416
},
{
"questionId": "q67",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "423.6",
"actual": "423.6",
"correct": true,
"inputTokens": 9289,
"outputTokens": 7,
"latencyMs": 1346.4980839999916
},
{
"questionId": "q67",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "423.6",
"actual": "423.6",
"correct": true,
"inputTokens": 7373,
"outputTokens": 265,
"latencyMs": 3634.2565419999883
},
{
"questionId": "q67",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "423.6",
"actual": "423.6",
"correct": true,
"inputTokens": 8385,
"outputTokens": 7,
"latencyMs": 1363.8280410000007
},
{
"questionId": "q68",
"format": "json",
"model": "gpt-5-nano",
"expected": "delivered",
"actual": "delivered",
"correct": true,
"inputTokens": 9738,
"outputTokens": 392,
"latencyMs": 3933.217000000004
},
{
"questionId": "q68",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "delivered",
"actual": "delivered",
"correct": true,
"inputTokens": 11906,
"outputTokens": 4,
"latencyMs": 1229.9339579999796
},
{
"questionId": "q68",
"format": "toon",
"model": "gpt-5-nano",
"expected": "delivered",
"actual": "delivered",
"correct": true,
"inputTokens": 6012,
"outputTokens": 136,
"latencyMs": 2728.4598340000084
},
{
"questionId": "q68",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "delivered",
"actual": "delivered",
"correct": true,
"inputTokens": 6992,
"outputTokens": 4,
"latencyMs": 1427.2494170000136
},
{
"questionId": "q68",
"format": "csv",
"model": "gpt-5-nano",
"expected": "delivered",
"actual": "delivered",
"correct": true,
"inputTokens": 6780,
"outputTokens": 200,
"latencyMs": 3187.385666999995
},
{
"questionId": "q68",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "delivered",
"actual": "delivered",
"correct": true,
"inputTokens": 8413,
"outputTokens": 4,
"latencyMs": 1482.2487079999992
},
{
"questionId": "q68",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "delivered",
"actual": "delivered",
"correct": true,
"inputTokens": 9157,
"outputTokens": 264,
"latencyMs": 3429.744458000001
},
{
"questionId": "q68",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "delivered",
"actual": "delivered",
"correct": true,
"inputTokens": 9288,
"outputTokens": 4,
"latencyMs": 1100.8814589999965
},
{
"questionId": "q68",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "delivered",
"actual": "delivered",
"correct": true,
"inputTokens": 7372,
"outputTokens": 72,
"latencyMs": 1993.443707999977
},
{
"questionId": "q68",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "delivered",
"actual": "delivered",
"correct": true,
"inputTokens": 8384,
"outputTokens": 4,
"latencyMs": 1105.5260419999831
},
{
"questionId": "q69",
"format": "json",
"model": "gpt-5-nano",
"expected": "784.03",
"actual": "784.03",
"correct": true,
"inputTokens": 9739,
"outputTokens": 137,
"latencyMs": 3255.3775840000017
},
{
"questionId": "q69",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "784.03",
"actual": "784.03",
"correct": true,
"inputTokens": 11907,
"outputTokens": 7,
"latencyMs": 1274.000417000003
},
{
"questionId": "q69",
"format": "toon",
"model": "gpt-5-nano",
"expected": "784.03",
"actual": "784.03",
"correct": true,
"inputTokens": 6013,
"outputTokens": 265,
"latencyMs": 3098.326624999987
},
{
"questionId": "q69",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "784.03",
"actual": "784.03",
"correct": true,
"inputTokens": 6993,
"outputTokens": 7,
"latencyMs": 1057.8637079999899
},
{
"questionId": "q69",
"format": "csv",
"model": "gpt-5-nano",
"expected": "784.03",
"actual": "784.03",
"correct": true,
"inputTokens": 6781,
"outputTokens": 201,
"latencyMs": 3651.3826249999984
},
{
"questionId": "q69",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "784.03",
"actual": "784.03",
"correct": true,
"inputTokens": 8414,
"outputTokens": 7,
"latencyMs": 1404.9795829999784
},
{
"questionId": "q69",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "784.03",
"actual": "784.03",
"correct": true,
"inputTokens": 9158,
"outputTokens": 201,
"latencyMs": 4157.148833000014
},
{
"questionId": "q69",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "784.03",
"actual": "784.03",
"correct": true,
"inputTokens": 9289,
"outputTokens": 7,
"latencyMs": 1607.9431249999907
},
{
"questionId": "q69",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "784.03",
"actual": "784.03",
"correct": true,
"inputTokens": 7373,
"outputTokens": 329,
"latencyMs": 4582.246665999992
},
{
"questionId": "q69",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "784.03",
"actual": "784.03",
"correct": true,
"inputTokens": 8385,
"outputTokens": 7,
"latencyMs": 1458.8513329999987
},
{
"questionId": "q70",
"format": "json",
"model": "gpt-5-nano",
"expected": "shipped",
"actual": "shipped",
"correct": true,
"inputTokens": 9738,
"outputTokens": 200,
"latencyMs": 3341.994207999989
},
{
"questionId": "q70",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "shipped",
"actual": "shipped",
"correct": true,
"inputTokens": 11906,
"outputTokens": 4,
"latencyMs": 1144.3136670000094
},
{
"questionId": "q70",
"format": "toon",
"model": "gpt-5-nano",
"expected": "shipped",
"actual": "shipped",
"correct": true,
"inputTokens": 6012,
"outputTokens": 392,
"latencyMs": 6067.672458999994
},
{
"questionId": "q70",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "shipped",
"actual": "shipped",
"correct": true,
"inputTokens": 6992,
"outputTokens": 4,
"latencyMs": 1325.0467500000086
},
{
"questionId": "q70",
"format": "csv",
"model": "gpt-5-nano",
"expected": "shipped",
"actual": "shipped",
"correct": true,
"inputTokens": 6780,
"outputTokens": 200,
"latencyMs": 2847.485000000015
},
{
"questionId": "q70",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "shipped",
"actual": "shipped",
"correct": true,
"inputTokens": 8413,
"outputTokens": 4,
"latencyMs": 1212.1944169999915
},
{
"questionId": "q70",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "shipped",
"actual": "shipped",
"correct": true,
"inputTokens": 9157,
"outputTokens": 456,
"latencyMs": 5099.853499999997
},
{
"questionId": "q70",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "shipped",
"actual": "shipped",
"correct": true,
"inputTokens": 9288,
"outputTokens": 4,
"latencyMs": 1284.708416999987
},
{
"questionId": "q70",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "shipped",
"actual": "shipped",
"correct": true,
"inputTokens": 7372,
"outputTokens": 200,
"latencyMs": 2745.7869170000195
},
{
"questionId": "q70",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "shipped",
"actual": "shipped",
"correct": true,
"inputTokens": 8384,
"outputTokens": 4,
"latencyMs": 1114.6338329999999
},
{
"questionId": "q71",
"format": "json",
"model": "gpt-5-nano",
"expected": "645.88",
"actual": "645.88",
"correct": true,
"inputTokens": 9739,
"outputTokens": 265,
"latencyMs": 3482.8154170000053
},
{
"questionId": "q71",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "645.88",
"actual": "645.88",
"correct": true,
"inputTokens": 11907,
"outputTokens": 7,
"latencyMs": 1156.5491669999901
},
{
"questionId": "q71",
"format": "toon",
"model": "gpt-5-nano",
"expected": "645.88",
"actual": "645.88",
"correct": true,
"inputTokens": 6013,
"outputTokens": 201,
"latencyMs": 2970.104541000008
},
{
"questionId": "q71",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "645.88",
"actual": "645.88",
"correct": true,
"inputTokens": 6993,
"outputTokens": 7,
"latencyMs": 1297.768374999985
},
{
"questionId": "q71",
"format": "csv",
"model": "gpt-5-nano",
"expected": "645.88",
"actual": "645.88",
"correct": true,
"inputTokens": 6781,
"outputTokens": 201,
"latencyMs": 3475.6895419999782
},
{
"questionId": "q71",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "645.88",
"actual": "645.88",
"correct": true,
"inputTokens": 8414,
"outputTokens": 7,
"latencyMs": 1469.7436250000028
},
{
"questionId": "q71",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "645.88",
"actual": "645.88",
"correct": true,
"inputTokens": 9158,
"outputTokens": 265,
"latencyMs": 4107.424582999985
},
{
"questionId": "q71",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "645.88",
"actual": "645.88",
"correct": true,
"inputTokens": 9289,
"outputTokens": 7,
"latencyMs": 1070.4507500000182
},
{
"questionId": "q71",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "645.88",
"actual": "645.88",
"correct": true,
"inputTokens": 7373,
"outputTokens": 265,
"latencyMs": 3768.3023749999993
},
{
"questionId": "q71",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "645.88",
"actual": "645.88",
"correct": true,
"inputTokens": 8385,
"outputTokens": 7,
"latencyMs": 1111.744915999996
},
{
"questionId": "q72",
"format": "json",
"model": "gpt-5-nano",
"expected": "processing",
"actual": "processing",
"correct": true,
"inputTokens": 9738,
"outputTokens": 263,
"latencyMs": 3199.3634999999776
},
{
"questionId": "q72",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "processing",
"actual": "processing",
"correct": true,
"inputTokens": 11906,
"outputTokens": 4,
"latencyMs": 1232.4811659999832
},
{
"questionId": "q72",
"format": "toon",
"model": "gpt-5-nano",
"expected": "processing",
"actual": "processing",
"correct": true,
"inputTokens": 6012,
"outputTokens": 263,
"latencyMs": 5616.989999999991
},
{
"questionId": "q72",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "processing",
"actual": "processing",
"correct": true,
"inputTokens": 6992,
"outputTokens": 4,
"latencyMs": 1697.3162920000032
},
{
"questionId": "q72",
"format": "csv",
"model": "gpt-5-nano",
"expected": "processing",
"actual": "processing",
"correct": true,
"inputTokens": 6780,
"outputTokens": 199,
"latencyMs": 2781.3399999999965
},
{
"questionId": "q72",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "processing",
"actual": "processing",
"correct": true,
"inputTokens": 8413,
"outputTokens": 4,
"latencyMs": 1162.0402089999989
},
{
"questionId": "q72",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "processing",
"actual": "processing",
"correct": true,
"inputTokens": 9157,
"outputTokens": 199,
"latencyMs": 3651.1349579999805
},
{
"questionId": "q72",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "processing",
"actual": "processing",
"correct": true,
"inputTokens": 9288,
"outputTokens": 4,
"latencyMs": 1132.3132920000062
},
{
"questionId": "q72",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "processing",
"actual": "processing",
"correct": true,
"inputTokens": 7372,
"outputTokens": 135,
"latencyMs": 3017.5073749999865
},
{
"questionId": "q72",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "processing",
"actual": "processing",
"correct": true,
"inputTokens": 8384,
"outputTokens": 4,
"latencyMs": 1294.688374999998
},
{
"questionId": "q73",
"format": "json",
"model": "gpt-5-nano",
"expected": "371.91",
"actual": "371.91",
"correct": true,
"inputTokens": 9739,
"outputTokens": 201,
"latencyMs": 3591.221499999985
},
{
"questionId": "q73",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "371.91",
"actual": "371.91",
"correct": true,
"inputTokens": 11907,
"outputTokens": 7,
"latencyMs": 1329.419332999998
},
{
"questionId": "q73",
"format": "toon",
"model": "gpt-5-nano",
"expected": "371.91",
"actual": "371.91",
"correct": true,
"inputTokens": 6013,
"outputTokens": 137,
"latencyMs": 2655.557792000007
},
{
"questionId": "q73",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "371.91",
"actual": "371.91",
"correct": true,
"inputTokens": 6993,
"outputTokens": 7,
"latencyMs": 1446.9020000000019
},
{
"questionId": "q73",
"format": "csv",
"model": "gpt-5-nano",
"expected": "371.91",
"actual": "371.91",
"correct": true,
"inputTokens": 6781,
"outputTokens": 201,
"latencyMs": 3450.5822500000068
},
{
"questionId": "q73",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "371.91",
"actual": "371.91",
"correct": true,
"inputTokens": 8414,
"outputTokens": 7,
"latencyMs": 1291.2180410000146
},
{
"questionId": "q73",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "371.91",
"actual": "371.91",
"correct": true,
"inputTokens": 9158,
"outputTokens": 201,
"latencyMs": 2803.9767500000016
},
{
"questionId": "q73",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "371.91",
"actual": "371.91",
"correct": true,
"inputTokens": 9289,
"outputTokens": 7,
"latencyMs": 1098.5968749999884
},
{
"questionId": "q73",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "371.91",
"actual": "371.91",
"correct": true,
"inputTokens": 7373,
"outputTokens": 201,
"latencyMs": 3047.8699999999953
},
{
"questionId": "q73",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "371.91",
"actual": "371.91",
"correct": true,
"inputTokens": 8385,
"outputTokens": 7,
"latencyMs": 1800.6882080000069
},
{
"questionId": "q74",
"format": "json",
"model": "gpt-5-nano",
"expected": "pending",
"actual": "pending",
"correct": true,
"inputTokens": 9738,
"outputTokens": 199,
"latencyMs": 2957.2203330000048
},
{
"questionId": "q74",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "pending",
"actual": "pending",
"correct": true,
"inputTokens": 11906,
"outputTokens": 4,
"latencyMs": 1165.7748750000028
},
{
"questionId": "q74",
"format": "toon",
"model": "gpt-5-nano",
"expected": "pending",
"actual": "pending",
"correct": true,
"inputTokens": 6012,
"outputTokens": 135,
"latencyMs": 2362.283208000008
},
{
"questionId": "q74",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "pending",
"actual": "pending",
"correct": true,
"inputTokens": 6992,
"outputTokens": 4,
"latencyMs": 1871.7275829999999
},
{
"questionId": "q74",
"format": "csv",
"model": "gpt-5-nano",
"expected": "pending",
"actual": "pending",
"correct": true,
"inputTokens": 6780,
"outputTokens": 263,
"latencyMs": 4747.243208
},
{
"questionId": "q74",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "pending",
"actual": "pending",
"correct": true,
"inputTokens": 8413,
"outputTokens": 4,
"latencyMs": 1275.342082999996
},
{
"questionId": "q74",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "pending",
"actual": "pending",
"correct": true,
"inputTokens": 9157,
"outputTokens": 199,
"latencyMs": 3180.0179160000116
},
{
"questionId": "q74",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "pending",
"actual": "pending",
"correct": true,
"inputTokens": 9288,
"outputTokens": 4,
"latencyMs": 2343.5514580000017
},
{
"questionId": "q74",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "pending",
"actual": "pending",
"correct": true,
"inputTokens": 7372,
"outputTokens": 135,
"latencyMs": 2362.525915999984
},
{
"questionId": "q74",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "pending",
"actual": "pending",
"correct": true,
"inputTokens": 8384,
"outputTokens": 4,
"latencyMs": 1231.4291669999948
},
{
"questionId": "q75",
"format": "json",
"model": "gpt-5-nano",
"expected": "1066",
"actual": "1066",
"correct": true,
"inputTokens": 9739,
"outputTokens": 200,
"latencyMs": 3091.9045840000035
},
{
"questionId": "q75",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "1066",
"actual": "1066",
"correct": true,
"inputTokens": 11907,
"outputTokens": 6,
"latencyMs": 1111.9695000000065
},
{
"questionId": "q75",
"format": "toon",
"model": "gpt-5-nano",
"expected": "1066",
"actual": "1066",
"correct": true,
"inputTokens": 6013,
"outputTokens": 264,
"latencyMs": 3977.5146669999813
},
{
"questionId": "q75",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "1066",
"actual": "1066",
"correct": true,
"inputTokens": 6993,
"outputTokens": 6,
"latencyMs": 1195.262208
},
{
"questionId": "q75",
"format": "csv",
"model": "gpt-5-nano",
"expected": "1066",
"actual": "1066",
"correct": true,
"inputTokens": 6781,
"outputTokens": 328,
"latencyMs": 3839.0627499999828
},
{
"questionId": "q75",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "1066",
"actual": "1066",
"correct": true,
"inputTokens": 8414,
"outputTokens": 6,
"latencyMs": 2186.8021250000165
},
{
"questionId": "q75",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "1066",
"actual": "1066",
"correct": true,
"inputTokens": 9158,
"outputTokens": 328,
"latencyMs": 6945.004667000001
},
{
"questionId": "q75",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "1066",
"actual": "1066",
"correct": true,
"inputTokens": 9289,
"outputTokens": 6,
"latencyMs": 1103.6762919999892
},
{
"questionId": "q75",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "1066",
"actual": "1066",
"correct": true,
"inputTokens": 7373,
"outputTokens": 264,
"latencyMs": 3924.5181250000023
},
{
"questionId": "q75",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "1066",
"actual": "1066",
"correct": true,
"inputTokens": 8385,
"outputTokens": 6,
"latencyMs": 1023.334583000018
},
{
"questionId": "q76",
"format": "json",
"model": "gpt-5-nano",
"expected": "cancelled",
"actual": "cancelled",
"correct": true,
"inputTokens": 9738,
"outputTokens": 264,
"latencyMs": 4017.931666999997
},
{
"questionId": "q76",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "cancelled",
"actual": "cancelled",
"correct": true,
"inputTokens": 11906,
"outputTokens": 4,
"latencyMs": 1278.6839580000087
},
{
"questionId": "q76",
"format": "toon",
"model": "gpt-5-nano",
"expected": "cancelled",
"actual": "cancelled",
"correct": true,
"inputTokens": 6012,
"outputTokens": 200,
"latencyMs": 2566.9374580000003
},
{
"questionId": "q76",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "cancelled",
"actual": "cancelled",
"correct": true,
"inputTokens": 6992,
"outputTokens": 4,
"latencyMs": 958.4104159999988
},
{
"questionId": "q76",
"format": "csv",
"model": "gpt-5-nano",
"expected": "cancelled",
"actual": "cancelled",
"correct": true,
"inputTokens": 6780,
"outputTokens": 264,
"latencyMs": 3640.0960409999825
},
{
"questionId": "q76",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "cancelled",
"actual": "cancelled",
"correct": true,
"inputTokens": 8413,
"outputTokens": 4,
"latencyMs": 1534.7306249999965
},
{
"questionId": "q76",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "cancelled",
"actual": "cancelled",
"correct": true,
"inputTokens": 9157,
"outputTokens": 328,
"latencyMs": 3905.6711249999935
},
{
"questionId": "q76",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "cancelled",
"actual": "cancelled",
"correct": true,
"inputTokens": 9288,
"outputTokens": 4,
"latencyMs": 2067.435375000001
},
{
"questionId": "q76",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "cancelled",
"actual": "cancelled",
"correct": true,
"inputTokens": 7372,
"outputTokens": 264,
"latencyMs": 3613.7146249999932
},
{
"questionId": "q76",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "cancelled",
"actual": "cancelled",
"correct": true,
"inputTokens": 8384,
"outputTokens": 4,
"latencyMs": 1154.955958000006
},
{
"questionId": "q77",
"format": "json",
"model": "gpt-5-nano",
"expected": "1697.4",
"actual": "1697.4",
"correct": true,
"inputTokens": 9739,
"outputTokens": 330,
"latencyMs": 3904.2146250000224
},
{
"questionId": "q77",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "1697.4",
"actual": "1697.4",
"correct": true,
"inputTokens": 11907,
"outputTokens": 8,
"latencyMs": 1618.7487079999992
},
{
"questionId": "q77",
"format": "toon",
"model": "gpt-5-nano",
"expected": "1697.4",
"actual": "1697.4",
"correct": true,
"inputTokens": 6013,
"outputTokens": 202,
"latencyMs": 2906.194541999983
},
{
"questionId": "q77",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "1697.4",
"actual": "1697.4",
"correct": true,
"inputTokens": 6993,
"outputTokens": 8,
"latencyMs": 1481.559333000012
},
{
"questionId": "q77",
"format": "csv",
"model": "gpt-5-nano",
"expected": "1697.4",
"actual": "1697.4",
"correct": true,
"inputTokens": 6781,
"outputTokens": 266,
"latencyMs": 3879.7539999999863
},
{
"questionId": "q77",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "1697.4",
"actual": "1697.4",
"correct": true,
"inputTokens": 8414,
"outputTokens": 8,
"latencyMs": 1809.5822499999776
},
{
"questionId": "q77",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "1697.4",
"actual": "1697.4",
"correct": true,
"inputTokens": 9158,
"outputTokens": 202,
"latencyMs": 3147.330500000011
},
{
"questionId": "q77",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "1697.4",
"actual": "1697.4",
"correct": true,
"inputTokens": 9289,
"outputTokens": 8,
"latencyMs": 1297.2377080000006
},
{
"questionId": "q77",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "1697.4",
"actual": "1697.4",
"correct": true,
"inputTokens": 7373,
"outputTokens": 394,
"latencyMs": 3710.157500000001
},
{
"questionId": "q77",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "1697.4",
"actual": "1697.4",
"correct": true,
"inputTokens": 8385,
"outputTokens": 8,
"latencyMs": 1238.5442500000063
},
{
"questionId": "q78",
"format": "json",
"model": "gpt-5-nano",
"expected": "delivered",
"actual": "delivered",
"correct": true,
"inputTokens": 9738,
"outputTokens": 392,
"latencyMs": 4101.743083999987
},
{
"questionId": "q78",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "delivered",
"actual": "delivered",
"correct": true,
"inputTokens": 11906,
"outputTokens": 4,
"latencyMs": 1170.750417000003
},
{
"questionId": "q78",
"format": "toon",
"model": "gpt-5-nano",
"expected": "delivered",
"actual": "delivered",
"correct": true,
"inputTokens": 6012,
"outputTokens": 264,
"latencyMs": 8324.009665999998
},
{
"questionId": "q78",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "delivered",
"actual": "delivered",
"correct": true,
"inputTokens": 6992,
"outputTokens": 4,
"latencyMs": 1173.343790999992
},
{
"questionId": "q78",
"format": "csv",
"model": "gpt-5-nano",
"expected": "delivered",
"actual": "delivered",
"correct": true,
"inputTokens": 6780,
"outputTokens": 264,
"latencyMs": 3005.4394999999786
},
{
"questionId": "q78",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "delivered",
"actual": "delivered",
"correct": true,
"inputTokens": 8413,
"outputTokens": 4,
"latencyMs": 1376.5506659999955
},
{
"questionId": "q78",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "delivered",
"actual": "delivered",
"correct": true,
"inputTokens": 9157,
"outputTokens": 136,
"latencyMs": 3209.5317499999946
},
{
"questionId": "q78",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "delivered",
"actual": "delivered",
"correct": true,
"inputTokens": 9288,
"outputTokens": 4,
"latencyMs": 1299.4064170000202
},
{
"questionId": "q78",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "delivered",
"actual": "delivered",
"correct": true,
"inputTokens": 7372,
"outputTokens": 264,
"latencyMs": 3753.726042000024
},
{
"questionId": "q78",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "delivered",
"actual": "delivered",
"correct": true,
"inputTokens": 8384,
"outputTokens": 4,
"latencyMs": 1134.558416999993
},
{
"questionId": "q79",
"format": "json",
"model": "gpt-5-nano",
"expected": "Valerie Braun",
"actual": "Valerie Braun",
"correct": true,
"inputTokens": 9739,
"outputTokens": 73,
"latencyMs": 2494.451874999999
},
{
"questionId": "q79",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Valerie Braun",
"actual": "Valerie Braun",
"correct": true,
"inputTokens": 11907,
"outputTokens": 9,
"latencyMs": 1270.5290410000016
},
{
"questionId": "q79",
"format": "toon",
"model": "gpt-5-nano",
"expected": "Valerie Braun",
"actual": "Valerie Braun",
"correct": true,
"inputTokens": 6013,
"outputTokens": 137,
"latencyMs": 2403.4134579999954
},
{
"questionId": "q79",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Valerie Braun",
"actual": "Valerie Braun",
"correct": true,
"inputTokens": 6993,
"outputTokens": 9,
"latencyMs": 1673.0169579999929
},
{
"questionId": "q79",
"format": "csv",
"model": "gpt-5-nano",
"expected": "Valerie Braun",
"actual": "Valerie Braun",
"correct": true,
"inputTokens": 6781,
"outputTokens": 73,
"latencyMs": 1704.8420409999962
},
{
"questionId": "q79",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Valerie Braun",
"actual": "Valerie Braun",
"correct": true,
"inputTokens": 8414,
"outputTokens": 9,
"latencyMs": 1447.5210840000072
},
{
"questionId": "q79",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "Valerie Braun",
"actual": "Valerie Braun",
"correct": true,
"inputTokens": 9158,
"outputTokens": 73,
"latencyMs": 1638.756207999977
},
{
"questionId": "q79",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "Valerie Braun",
"actual": "Valerie Braun",
"correct": true,
"inputTokens": 9289,
"outputTokens": 9,
"latencyMs": 1504.7892920000013
},
{
"questionId": "q79",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "Valerie Braun",
"actual": "Valerie Braun",
"correct": true,
"inputTokens": 7373,
"outputTokens": 137,
"latencyMs": 2409.509625000006
},
{
"questionId": "q79",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Valerie Braun",
"actual": "Valerie Braun",
"correct": true,
"inputTokens": 8385,
"outputTokens": 9,
"latencyMs": 1318.699833999999
},
{
"questionId": "q80",
"format": "json",
"model": "gpt-5-nano",
"expected": "Anita Kozey",
"actual": "Anita Kozey",
"correct": true,
"inputTokens": 9739,
"outputTokens": 138,
"latencyMs": 2616.233749999985
},
{
"questionId": "q80",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Anita Kozey",
"actual": "Anita Kozey",
"correct": true,
"inputTokens": 11907,
"outputTokens": 9,
"latencyMs": 1314.3836249999877
},
{
"questionId": "q80",
"format": "toon",
"model": "gpt-5-nano",
"expected": "Anita Kozey",
"actual": "Anita Kozey",
"correct": true,
"inputTokens": 6013,
"outputTokens": 138,
"latencyMs": 2722.7087499999907
},
{
"questionId": "q80",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Anita Kozey",
"actual": "Anita Kozey",
"correct": true,
"inputTokens": 6993,
"outputTokens": 9,
"latencyMs": 1190.632500000007
},
{
"questionId": "q80",
"format": "csv",
"model": "gpt-5-nano",
"expected": "Anita Kozey",
"actual": "Anita Kozey",
"correct": true,
"inputTokens": 6781,
"outputTokens": 330,
"latencyMs": 4346.388291999989
},
{
"questionId": "q80",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Anita Kozey",
"actual": "Anita Kozey",
"correct": true,
"inputTokens": 8414,
"outputTokens": 9,
"latencyMs": 1327.8158750000002
},
{
"questionId": "q80",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "Anita Kozey",
"actual": "Anita Kozey",
"correct": true,
"inputTokens": 9158,
"outputTokens": 74,
"latencyMs": 2443.0598340000142
},
{
"questionId": "q80",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "Anita Kozey",
"actual": "Anita Kozey",
"correct": true,
"inputTokens": 9289,
"outputTokens": 9,
"latencyMs": 1396.4260829999985
},
{
"questionId": "q80",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "Anita Kozey",
"actual": "Anita Kozey",
"correct": true,
"inputTokens": 7373,
"outputTokens": 266,
"latencyMs": 4886.8007919999945
},
{
"questionId": "q80",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Anita Kozey",
"actual": "Anita Kozey",
"correct": true,
"inputTokens": 8385,
"outputTokens": 9,
"latencyMs": 1469.287249999994
},
{
"questionId": "q81",
"format": "json",
"model": "gpt-5-nano",
"expected": "Elmer Kub PhD",
"actual": "Elmer Kub PhD",
"correct": true,
"inputTokens": 9739,
"outputTokens": 139,
"latencyMs": 2891.1199170000036
},
{
"questionId": "q81",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Elmer Kub PhD",
"actual": "Elmer Kub PhD",
"correct": true,
"inputTokens": 11907,
"outputTokens": 10,
"latencyMs": 1342.1902079999854
},
{
"questionId": "q81",
"format": "toon",
"model": "gpt-5-nano",
"expected": "Elmer Kub PhD",
"actual": "Elmer Kub PhD",
"correct": true,
"inputTokens": 6013,
"outputTokens": 139,
"latencyMs": 2846.046624999988
},
{
"questionId": "q81",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Elmer Kub PhD",
"actual": "Elmer Kub PhD",
"correct": true,
"inputTokens": 6993,
"outputTokens": 10,
"latencyMs": 1327.919499999989
},
{
"questionId": "q81",
"format": "csv",
"model": "gpt-5-nano",
"expected": "Elmer Kub PhD",
"actual": "Elmer Kub PhD",
"correct": true,
"inputTokens": 6781,
"outputTokens": 139,
"latencyMs": 4302.444041999988
},
{
"questionId": "q81",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Elmer Kub PhD",
"actual": "Elmer Kub PhD",
"correct": true,
"inputTokens": 8414,
"outputTokens": 10,
"latencyMs": 1207.6207500000019
},
{
"questionId": "q81",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "Elmer Kub PhD",
"actual": "Elmer Kub PhD",
"correct": true,
"inputTokens": 9158,
"outputTokens": 267,
"latencyMs": 3389.5046659999934
},
{
"questionId": "q81",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "Elmer Kub PhD",
"actual": "Elmer Kub PhD",
"correct": true,
"inputTokens": 9289,
"outputTokens": 10,
"latencyMs": 1236.2248340000224
},
{
"questionId": "q81",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "Elmer Kub PhD",
"actual": "Elmer Kub PhD",
"correct": true,
"inputTokens": 7373,
"outputTokens": 139,
"latencyMs": 2138.4831669999985
},
{
"questionId": "q81",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Elmer Kub PhD",
"actual": "Elmer Kub PhD",
"correct": true,
"inputTokens": 8385,
"outputTokens": 10,
"latencyMs": 1233.3828330000106
},
{
"questionId": "q82",
"format": "json",
"model": "gpt-5-nano",
"expected": "Maxine Zemlak",
"actual": "Maxine Zemlak",
"correct": true,
"inputTokens": 9739,
"outputTokens": 138,
"latencyMs": 3346.8621669999848
},
{
"questionId": "q82",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Maxine Zemlak",
"actual": "Maxine Zemlak",
"correct": true,
"inputTokens": 11907,
"outputTokens": 10,
"latencyMs": 1321.650082999986
},
{
"questionId": "q82",
"format": "toon",
"model": "gpt-5-nano",
"expected": "Maxine Zemlak",
"actual": "Maxine Zemlak",
"correct": true,
"inputTokens": 6013,
"outputTokens": 138,
"latencyMs": 2395.766499999998
},
{
"questionId": "q82",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Maxine Zemlak",
"actual": "Maxine Zemlak",
"correct": true,
"inputTokens": 6993,
"outputTokens": 10,
"latencyMs": 1749.51670800001
},
{
"questionId": "q82",
"format": "csv",
"model": "gpt-5-nano",
"expected": "Maxine Zemlak",
"actual": "Maxine Zemlak",
"correct": true,
"inputTokens": 6781,
"outputTokens": 330,
"latencyMs": 4207.4487500000105
},
{
"questionId": "q82",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Maxine Zemlak",
"actual": "Maxine Zemlak",
"correct": true,
"inputTokens": 8414,
"outputTokens": 10,
"latencyMs": 1495.846125000011
},
{
"questionId": "q82",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "Maxine Zemlak",
"actual": "Maxine Zemlak",
"correct": true,
"inputTokens": 9158,
"outputTokens": 266,
"latencyMs": 4258.881374999997
},
{
"questionId": "q82",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "Maxine Zemlak",
"actual": "Maxine Zemlak",
"correct": true,
"inputTokens": 9289,
"outputTokens": 10,
"latencyMs": 1113.9782499999856
},
{
"questionId": "q82",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "Maxine Zemlak",
"actual": "Maxine Zemlak",
"correct": true,
"inputTokens": 7373,
"outputTokens": 74,
"latencyMs": 1841.1115829999908
},
{
"questionId": "q82",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Maxine Zemlak",
"actual": "Maxine Zemlak",
"correct": true,
"inputTokens": 8385,
"outputTokens": 10,
"latencyMs": 1350.6631249999919
},
{
"questionId": "q83",
"format": "json",
"model": "gpt-5-nano",
"expected": "Emanuel Littel",
"actual": "Emanuel Littel",
"correct": true,
"inputTokens": 9739,
"outputTokens": 138,
"latencyMs": 2322.9531669999997
},
{
"questionId": "q83",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Emanuel Littel",
"actual": "Emanuel Littel",
"correct": true,
"inputTokens": 11907,
"outputTokens": 7,
"latencyMs": 1556.4763749999984
},
{
"questionId": "q83",
"format": "toon",
"model": "gpt-5-nano",
"expected": "Emanuel Littel",
"actual": "Emanuel Littel",
"correct": true,
"inputTokens": 6013,
"outputTokens": 74,
"latencyMs": 2354.004667000001
},
{
"questionId": "q83",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Emanuel Littel",
"actual": "Emanuel Littel",
"correct": true,
"inputTokens": 6993,
"outputTokens": 7,
"latencyMs": 1314.1952909999818
},
{
"questionId": "q83",
"format": "csv",
"model": "gpt-5-nano",
"expected": "Emanuel Littel",
"actual": "Emanuel Littel",
"correct": true,
"inputTokens": 6781,
"outputTokens": 138,
"latencyMs": 3437.8392080000194
},
{
"questionId": "q83",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Emanuel Littel",
"actual": "Emanuel Littel",
"correct": true,
"inputTokens": 8414,
"outputTokens": 7,
"latencyMs": 1131.0356249999895
},
{
"questionId": "q83",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "Emanuel Littel",
"actual": "Emanuel Littel",
"correct": true,
"inputTokens": 9158,
"outputTokens": 138,
"latencyMs": 3209.646000000008
},
{
"questionId": "q83",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "Emanuel Littel",
"actual": "Emanuel Littel",
"correct": true,
"inputTokens": 9289,
"outputTokens": 7,
"latencyMs": 1175.6475829999836
},
{
"questionId": "q83",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "Emanuel Littel",
"actual": "Emanuel Littel",
"correct": true,
"inputTokens": 7373,
"outputTokens": 266,
"latencyMs": 3785.0792920000094
},
{
"questionId": "q83",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Emanuel Littel",
"actual": "Emanuel Littel",
"correct": true,
"inputTokens": 8385,
"outputTokens": 7,
"latencyMs": 1314.7905420000025
},
{
"questionId": "q84",
"format": "json",
"model": "gpt-5-nano",
"expected": "Andrew Kling",
"actual": "Andrew Kling",
"correct": true,
"inputTokens": 9739,
"outputTokens": 72,
"latencyMs": 2562.896166999999
},
{
"questionId": "q84",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Andrew Kling",
"actual": "Andrew Kling",
"correct": true,
"inputTokens": 11907,
"outputTokens": 7,
"latencyMs": 3205.178583000001
},
{
"questionId": "q84",
"format": "toon",
"model": "gpt-5-nano",
"expected": "Andrew Kling",
"actual": "Andrew Kling",
"correct": true,
"inputTokens": 6013,
"outputTokens": 136,
"latencyMs": 3746.9874170000257
},
{
"questionId": "q84",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Andrew Kling",
"actual": "Andrew Kling",
"correct": true,
"inputTokens": 6993,
"outputTokens": 7,
"latencyMs": 1159.280584000022
},
{
"questionId": "q84",
"format": "csv",
"model": "gpt-5-nano",
"expected": "Andrew Kling",
"actual": "Marvin Thiel",
"correct": false,
"inputTokens": 6781,
"outputTokens": 202,
"latencyMs": 2584.499542000005
},
{
"questionId": "q84",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Andrew Kling",
"actual": "Andrew Kling",
"correct": true,
"inputTokens": 8414,
"outputTokens": 7,
"latencyMs": 1249.9375
},
{
"questionId": "q84",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "Andrew Kling",
"actual": "Andrew Kling",
"correct": true,
"inputTokens": 9158,
"outputTokens": 136,
"latencyMs": 2068.6956669999927
},
{
"questionId": "q84",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "Andrew Kling",
"actual": "Andrew Kling",
"correct": true,
"inputTokens": 9289,
"outputTokens": 7,
"latencyMs": 1733.235834000021
},
{
"questionId": "q84",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "Andrew Kling",
"actual": "Andrew Kling",
"correct": true,
"inputTokens": 7373,
"outputTokens": 200,
"latencyMs": 3831.721124999982
},
{
"questionId": "q84",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Andrew Kling",
"actual": "Andrew Kling",
"correct": true,
"inputTokens": 8385,
"outputTokens": 7,
"latencyMs": 1311.1745419999934
},
{
"questionId": "q85",
"format": "json",
"model": "gpt-5-nano",
"expected": "Morris O'Hara",
"actual": "Morris O'Hara",
"correct": true,
"inputTokens": 9739,
"outputTokens": 139,
"latencyMs": 5464.460791999998
},
{
"questionId": "q85",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Morris O'Hara",
"actual": "Morris O'Hara",
"correct": true,
"inputTokens": 11907,
"outputTokens": 9,
"latencyMs": 1266.8881249999977
},
{
"questionId": "q85",
"format": "toon",
"model": "gpt-5-nano",
"expected": "Morris O'Hara",
"actual": "Morris O'Hara",
"correct": true,
"inputTokens": 6013,
"outputTokens": 203,
"latencyMs": 2957.0821250000154
},
{
"questionId": "q85",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Morris O'Hara",
"actual": "Morris O'Hara",
"correct": true,
"inputTokens": 6993,
"outputTokens": 9,
"latencyMs": 1264.50791700001
},
{
"questionId": "q85",
"format": "csv",
"model": "gpt-5-nano",
"expected": "Morris O'Hara",
"actual": "Morris O'Hara",
"correct": true,
"inputTokens": 6781,
"outputTokens": 331,
"latencyMs": 3740.643666000018
},
{
"questionId": "q85",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Morris O'Hara",
"actual": "Morris O'Hara",
"correct": true,
"inputTokens": 8414,
"outputTokens": 9,
"latencyMs": 1310.5358749999723
},
{
"questionId": "q85",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "Morris O'Hara",
"actual": "Morris O'Hara",
"correct": true,
"inputTokens": 9158,
"outputTokens": 139,
"latencyMs": 2979.4539579999982
},
{
"questionId": "q85",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "Morris O'Hara",
"actual": "Morris O'Hara",
"correct": true,
"inputTokens": 9289,
"outputTokens": 9,
"latencyMs": 2026.8683329999913
},
{
"questionId": "q85",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "Morris O'Hara",
"actual": "Morris O'Hara",
"correct": true,
"inputTokens": 7373,
"outputTokens": 139,
"latencyMs": 2932.0294159999758
},
{
"questionId": "q85",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Morris O'Hara",
"actual": "Morris O'Hara",
"correct": true,
"inputTokens": 8385,
"outputTokens": 9,
"latencyMs": 1130.2447079999838
},
{
"questionId": "q86",
"format": "json",
"model": "gpt-5-nano",
"expected": "Elijah Franecki",
"actual": "Elijah Franecki",
"correct": true,
"inputTokens": 9739,
"outputTokens": 203,
"latencyMs": 2576.945458000002
},
{
"questionId": "q86",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Elijah Franecki",
"actual": "Elijah Franecki",
"correct": true,
"inputTokens": 11907,
"outputTokens": 9,
"latencyMs": 1214.6620409999741
},
{
"questionId": "q86",
"format": "toon",
"model": "gpt-5-nano",
"expected": "Elijah Franecki",
"actual": "Elijah Franecki",
"correct": true,
"inputTokens": 6013,
"outputTokens": 203,
"latencyMs": 3718.371167000005
},
{
"questionId": "q86",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Elijah Franecki",
"actual": "Elijah Franecki",
"correct": true,
"inputTokens": 6993,
"outputTokens": 9,
"latencyMs": 1374.984832999995
},
{
"questionId": "q86",
"format": "csv",
"model": "gpt-5-nano",
"expected": "Elijah Franecki",
"actual": "Elijah Franecki",
"correct": true,
"inputTokens": 6781,
"outputTokens": 139,
"latencyMs": 2313.5867499999877
},
{
"questionId": "q86",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Elijah Franecki",
"actual": "Elijah Franecki",
"correct": true,
"inputTokens": 8414,
"outputTokens": 9,
"latencyMs": 1325.0793330000015
},
{
"questionId": "q86",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "Elijah Franecki",
"actual": "Elijah Franecki",
"correct": true,
"inputTokens": 9158,
"outputTokens": 139,
"latencyMs": 2777.8669999999984
},
{
"questionId": "q86",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "Elijah Franecki",
"actual": "Elijah Franecki",
"correct": true,
"inputTokens": 9289,
"outputTokens": 9,
"latencyMs": 1246.2134589999914
},
{
"questionId": "q86",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "Elijah Franecki",
"actual": "Elijah Franecki",
"correct": true,
"inputTokens": 7373,
"outputTokens": 75,
"latencyMs": 2246.8254580000066
},
{
"questionId": "q86",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Elijah Franecki",
"actual": "Elijah Franecki",
"correct": true,
"inputTokens": 8385,
"outputTokens": 9,
"latencyMs": 1573.5733749999781
},
{
"questionId": "q87",
"format": "json",
"model": "gpt-5-nano",
"expected": "Malcolm Erdman",
"actual": "Malcolm Erdman",
"correct": true,
"inputTokens": 9739,
"outputTokens": 74,
"latencyMs": 2494.7630000000063
},
{
"questionId": "q87",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Malcolm Erdman",
"actual": "Malcolm Erdman",
"correct": true,
"inputTokens": 11907,
"outputTokens": 7,
"latencyMs": 1135.412083000003
},
{
"questionId": "q87",
"format": "toon",
"model": "gpt-5-nano",
"expected": "Malcolm Erdman",
"actual": "Malcolm Erdman",
"correct": true,
"inputTokens": 6013,
"outputTokens": 138,
"latencyMs": 2332.6303330000082
},
{
"questionId": "q87",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Malcolm Erdman",
"actual": "Malcolm Erdman",
"correct": true,
"inputTokens": 6993,
"outputTokens": 7,
"latencyMs": 1175.6766249999928
},
{
"questionId": "q87",
"format": "csv",
"model": "gpt-5-nano",
"expected": "Malcolm Erdman",
"actual": "Malcolm Erdman",
"correct": true,
"inputTokens": 6781,
"outputTokens": 458,
"latencyMs": 4252.623416000017
},
{
"questionId": "q87",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Malcolm Erdman",
"actual": "Malcolm Erdman",
"correct": true,
"inputTokens": 8414,
"outputTokens": 7,
"latencyMs": 1297.546416999976
},
{
"questionId": "q87",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "Malcolm Erdman",
"actual": "Malcolm Erdman",
"correct": true,
"inputTokens": 9158,
"outputTokens": 74,
"latencyMs": 2264.2770829999936
},
{
"questionId": "q87",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "Malcolm Erdman",
"actual": "Malcolm Erdman",
"correct": true,
"inputTokens": 9289,
"outputTokens": 7,
"latencyMs": 1055.0764170000039
},
{
"questionId": "q87",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "Malcolm Erdman",
"actual": "Malcolm Erdman",
"correct": true,
"inputTokens": 7373,
"outputTokens": 138,
"latencyMs": 3193.2753749999974
},
{
"questionId": "q87",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Malcolm Erdman",
"actual": "Malcolm Erdman",
"correct": true,
"inputTokens": 8385,
"outputTokens": 7,
"latencyMs": 1912.7229999999981
},
{
"questionId": "q88",
"format": "json",
"model": "gpt-5-nano",
"expected": "Fannie Skiles",
"actual": "Fannie Skiles",
"correct": true,
"inputTokens": 9739,
"outputTokens": 138,
"latencyMs": 2147.5894160000025
},
{
"questionId": "q88",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Fannie Skiles",
"actual": "Fannie Skiles",
"correct": true,
"inputTokens": 11907,
"outputTokens": 9,
"latencyMs": 1377.5190409999923
},
{
"questionId": "q88",
"format": "toon",
"model": "gpt-5-nano",
"expected": "Fannie Skiles",
"actual": "Fannie Skiles",
"correct": true,
"inputTokens": 6013,
"outputTokens": 202,
"latencyMs": 4472.317459000013
},
{
"questionId": "q88",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Fannie Skiles",
"actual": "Fannie Skiles",
"correct": true,
"inputTokens": 6993,
"outputTokens": 9,
"latencyMs": 1376.0682919999817
},
{
"questionId": "q88",
"format": "csv",
"model": "gpt-5-nano",
"expected": "Fannie Skiles",
"actual": "Fannie Skiles",
"correct": true,
"inputTokens": 6781,
"outputTokens": 202,
"latencyMs": 6952.122459000006
},
{
"questionId": "q88",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Fannie Skiles",
"actual": "Fannie Skiles",
"correct": true,
"inputTokens": 8414,
"outputTokens": 9,
"latencyMs": 1178.8732909999962
},
{
"questionId": "q88",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "Fannie Skiles",
"actual": "Fannie Skiles",
"correct": true,
"inputTokens": 9158,
"outputTokens": 266,
"latencyMs": 3619.214917000005
},
{
"questionId": "q88",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "Fannie Skiles",
"actual": "Fannie Skiles",
"correct": true,
"inputTokens": 9289,
"outputTokens": 9,
"latencyMs": 1212.3732920000039
},
{
"questionId": "q88",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "Fannie Skiles",
"actual": "Fannie Skiles",
"correct": true,
"inputTokens": 7373,
"outputTokens": 202,
"latencyMs": 5169.327332999994
},
{
"questionId": "q88",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Fannie Skiles",
"actual": "Fannie Skiles",
"correct": true,
"inputTokens": 8385,
"outputTokens": 9,
"latencyMs": 1452.6941670000087
},
{
"questionId": "q89",
"format": "json",
"model": "gpt-5-nano",
"expected": "Sonja Emmerich",
"actual": "Sonja Emmerich",
"correct": true,
"inputTokens": 9739,
"outputTokens": 395,
"latencyMs": 3384.798125000001
},
{
"questionId": "q89",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Sonja Emmerich",
"actual": "Sonja Emmerich",
"correct": true,
"inputTokens": 11907,
"outputTokens": 10,
"latencyMs": 1241.960665999999
},
{
"questionId": "q89",
"format": "toon",
"model": "gpt-5-nano",
"expected": "Sonja Emmerich",
"actual": "Sonja Emmerich",
"correct": true,
"inputTokens": 6013,
"outputTokens": 331,
"latencyMs": 4747.914124999981
},
{
"questionId": "q89",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Sonja Emmerich",
"actual": "Sonja Emmerich",
"correct": true,
"inputTokens": 6993,
"outputTokens": 10,
"latencyMs": 1302.8907080000208
},
{
"questionId": "q89",
"format": "csv",
"model": "gpt-5-nano",
"expected": "Sonja Emmerich",
"actual": "Sonja Emmerich",
"correct": true,
"inputTokens": 6781,
"outputTokens": 331,
"latencyMs": 3532.4660830000066
},
{
"questionId": "q89",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Sonja Emmerich",
"actual": "Sonja Emmerich",
"correct": true,
"inputTokens": 8414,
"outputTokens": 10,
"latencyMs": 1203.086540999997
},
{
"questionId": "q89",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "Sonja Emmerich",
"actual": "Sonja Emmerich",
"correct": true,
"inputTokens": 9158,
"outputTokens": 331,
"latencyMs": 4074.5077089999977
},
{
"questionId": "q89",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "Sonja Emmerich",
"actual": "Sonja Emmerich",
"correct": true,
"inputTokens": 9289,
"outputTokens": 10,
"latencyMs": 1345.891499999998
},
{
"questionId": "q89",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "Sonja Emmerich",
"actual": "Sonja Emmerich",
"correct": true,
"inputTokens": 7373,
"outputTokens": 75,
"latencyMs": 1885.0838330000115
},
{
"questionId": "q89",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Sonja Emmerich",
"actual": "Sonja Emmerich",
"correct": true,
"inputTokens": 8385,
"outputTokens": 10,
"latencyMs": 1182.5891669999983
},
{
"questionId": "q90",
"format": "json",
"model": "gpt-5-nano",
"expected": "Frank Emmerich DVM",
"actual": "Frank Emmerich DVM",
"correct": true,
"inputTokens": 9739,
"outputTokens": 140,
"latencyMs": 2772.3258339999884
},
{
"questionId": "q90",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Frank Emmerich DVM",
"actual": "Frank Emmerich DVM",
"correct": true,
"inputTokens": 11907,
"outputTokens": 10,
"latencyMs": 1424.9674579999992
},
{
"questionId": "q90",
"format": "toon",
"model": "gpt-5-nano",
"expected": "Frank Emmerich DVM",
"actual": "Frank Emmerich DVM",
"correct": true,
"inputTokens": 6013,
"outputTokens": 204,
"latencyMs": 2900.4731660000107
},
{
"questionId": "q90",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Frank Emmerich DVM",
"actual": "Frank Emmerich DVM",
"correct": true,
"inputTokens": 6993,
"outputTokens": 10,
"latencyMs": 2815.817249999993
},
{
"questionId": "q90",
"format": "csv",
"model": "gpt-5-nano",
"expected": "Frank Emmerich DVM",
"actual": "Frank Emmerich DVM",
"correct": true,
"inputTokens": 6781,
"outputTokens": 268,
"latencyMs": 3637.2442089999968
},
{
"questionId": "q90",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Frank Emmerich DVM",
"actual": "Frank Emmerich DVM",
"correct": true,
"inputTokens": 8414,
"outputTokens": 10,
"latencyMs": 1104.2333339999896
},
{
"questionId": "q90",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "Frank Emmerich DVM",
"actual": "Frank Emmerich DVM",
"correct": true,
"inputTokens": 9158,
"outputTokens": 396,
"latencyMs": 8213.703791999986
},
{
"questionId": "q90",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "Frank Emmerich DVM",
"actual": "Frank Emmerich DVM",
"correct": true,
"inputTokens": 9289,
"outputTokens": 10,
"latencyMs": 2875.9923749999725
},
{
"questionId": "q90",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "Frank Emmerich DVM",
"actual": "Frank Emmerich DVM",
"correct": true,
"inputTokens": 7373,
"outputTokens": 140,
"latencyMs": 2809.8342080000148
},
{
"questionId": "q90",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Frank Emmerich DVM",
"actual": "Frank Emmerich DVM",
"correct": true,
"inputTokens": 8385,
"outputTokens": 10,
"latencyMs": 1306.0824999999895
},
{
"questionId": "q91",
"format": "json",
"model": "gpt-5-nano",
"expected": "Ronald Collins",
"actual": "Ronald Collins",
"correct": true,
"inputTokens": 9739,
"outputTokens": 265,
"latencyMs": 3632.680000000022
},
{
"questionId": "q91",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Ronald Collins",
"actual": "Ronald Collins",
"correct": true,
"inputTokens": 11907,
"outputTokens": 5,
"latencyMs": 1446.0535420000087
},
{
"questionId": "q91",
"format": "toon",
"model": "gpt-5-nano",
"expected": "Ronald Collins",
"actual": "Ronald Collins",
"correct": true,
"inputTokens": 6013,
"outputTokens": 201,
"latencyMs": 2629.6447500000068
},
{
"questionId": "q91",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Ronald Collins",
"actual": "Ronald Collins",
"correct": true,
"inputTokens": 6993,
"outputTokens": 5,
"latencyMs": 1387.298958999978
},
{
"questionId": "q91",
"format": "csv",
"model": "gpt-5-nano",
"expected": "Ronald Collins",
"actual": "Ronald Collins",
"correct": true,
"inputTokens": 6781,
"outputTokens": 457,
"latencyMs": 8303.644042
},
{
"questionId": "q91",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Ronald Collins",
"actual": "Ronald Collins",
"correct": true,
"inputTokens": 8414,
"outputTokens": 5,
"latencyMs": 1178.2771250000224
},
{
"questionId": "q91",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "Ronald Collins",
"actual": "Ronald Collins",
"correct": true,
"inputTokens": 9158,
"outputTokens": 329,
"latencyMs": 3967.7135410000046
},
{
"questionId": "q91",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "Ronald Collins",
"actual": "Ronald Collins",
"correct": true,
"inputTokens": 9289,
"outputTokens": 5,
"latencyMs": 1278.0479160000104
},
{
"questionId": "q91",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "Ronald Collins",
"actual": "Ronald Collins",
"correct": true,
"inputTokens": 7373,
"outputTokens": 73,
"latencyMs": 1974.7658750000119
},
{
"questionId": "q91",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Ronald Collins",
"actual": "Ronald Collins",
"correct": true,
"inputTokens": 8385,
"outputTokens": 5,
"latencyMs": 1496.9746670000022
},
{
"questionId": "q92",
"format": "json",
"model": "gpt-5-nano",
"expected": "Jeannie Klein",
"actual": "Jeannie Klein",
"correct": true,
"inputTokens": 9739,
"outputTokens": 201,
"latencyMs": 4246.4962499999965
},
{
"questionId": "q92",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Jeannie Klein",
"actual": "Jeannie Klein",
"correct": true,
"inputTokens": 11907,
"outputTokens": 8,
"latencyMs": 1322.2766660000198
},
{
"questionId": "q92",
"format": "toon",
"model": "gpt-5-nano",
"expected": "Jeannie Klein",
"actual": "Jeannie Klein",
"correct": true,
"inputTokens": 6013,
"outputTokens": 137,
"latencyMs": 2135.097083999979
},
{
"questionId": "q92",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Jeannie Klein",
"actual": "Jeannie Klein",
"correct": true,
"inputTokens": 6993,
"outputTokens": 8,
"latencyMs": 1213.9765000000189
},
{
"questionId": "q92",
"format": "csv",
"model": "gpt-5-nano",
"expected": "Jeannie Klein",
"actual": "Jeannie Klein",
"correct": true,
"inputTokens": 6781,
"outputTokens": 265,
"latencyMs": 3583.0762920000125
},
{
"questionId": "q92",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Jeannie Klein",
"actual": "Jeannie Klein",
"correct": true,
"inputTokens": 8414,
"outputTokens": 8,
"latencyMs": 1353.168249999988
},
{
"questionId": "q92",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "Jeannie Klein",
"actual": "Jeannie Klein",
"correct": true,
"inputTokens": 9158,
"outputTokens": 201,
"latencyMs": 3724.366249999992
},
{
"questionId": "q92",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "Jeannie Klein",
"actual": "Jeannie Klein",
"correct": true,
"inputTokens": 9289,
"outputTokens": 8,
"latencyMs": 1239.5215000000026
},
{
"questionId": "q92",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "Jeannie Klein",
"actual": "Jeannie Klein",
"correct": true,
"inputTokens": 7373,
"outputTokens": 137,
"latencyMs": 2863.772667000012
},
{
"questionId": "q92",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Jeannie Klein",
"actual": "Jeannie Klein",
"correct": true,
"inputTokens": 8385,
"outputTokens": 8,
"latencyMs": 1297.5507919999945
},
{
"questionId": "q93",
"format": "json",
"model": "gpt-5-nano",
"expected": "Joshua Watsica",
"actual": "Joshua Watsica",
"correct": true,
"inputTokens": 9739,
"outputTokens": 202,
"latencyMs": 2533.5459160000028
},
{
"questionId": "q93",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Joshua Watsica",
"actual": "Joshua Watsica",
"correct": true,
"inputTokens": 11907,
"outputTokens": 8,
"latencyMs": 1313.4649999999965
},
{
"questionId": "q93",
"format": "toon",
"model": "gpt-5-nano",
"expected": "Joshua Watsica",
"actual": "Joshua Watsica",
"correct": true,
"inputTokens": 6013,
"outputTokens": 74,
"latencyMs": 1609.448166999995
},
{
"questionId": "q93",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Joshua Watsica",
"actual": "Joshua Watsica",
"correct": true,
"inputTokens": 6993,
"outputTokens": 8,
"latencyMs": 1257.2229999999981
},
{
"questionId": "q93",
"format": "csv",
"model": "gpt-5-nano",
"expected": "Joshua Watsica",
"actual": "Joshua Watsica",
"correct": true,
"inputTokens": 6781,
"outputTokens": 458,
"latencyMs": 5294.154332999984
},
{
"questionId": "q93",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Joshua Watsica",
"actual": "Joshua Watsica",
"correct": true,
"inputTokens": 8414,
"outputTokens": 8,
"latencyMs": 1363.172208999982
},
{
"questionId": "q93",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "Joshua Watsica",
"actual": "Joshua Watsica",
"correct": true,
"inputTokens": 9158,
"outputTokens": 74,
"latencyMs": 2154.742499999993
},
{
"questionId": "q93",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "Joshua Watsica",
"actual": "Joshua Watsica",
"correct": true,
"inputTokens": 9289,
"outputTokens": 8,
"latencyMs": 1509.8229580000043
},
{
"questionId": "q93",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "Joshua Watsica",
"actual": "Joshua Watsica",
"correct": true,
"inputTokens": 7373,
"outputTokens": 74,
"latencyMs": 2010.5185419999762
},
{
"questionId": "q93",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Joshua Watsica",
"actual": "Joshua Watsica",
"correct": true,
"inputTokens": 8385,
"outputTokens": 8,
"latencyMs": 1193.5151659999974
},
{
"questionId": "q94",
"format": "json",
"model": "gpt-5-nano",
"expected": "10",
"actual": "10",
"correct": true,
"inputTokens": 9735,
"outputTokens": 1031,
"latencyMs": 9550.510582999996
},
{
"questionId": "q94",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "8",
"correct": false,
"inputTokens": 11902,
"outputTokens": 5,
"latencyMs": 1146.0822499999776
},
{
"questionId": "q94",
"format": "toon",
"model": "gpt-5-nano",
"expected": "10",
"actual": "10",
"correct": true,
"inputTokens": 6009,
"outputTokens": 775,
"latencyMs": 6479.700542000006
},
{
"questionId": "q94",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "8",
"correct": false,
"inputTokens": 6988,
"outputTokens": 5,
"latencyMs": 1329.610708000022
},
{
"questionId": "q94",
"format": "csv",
"model": "gpt-5-nano",
"expected": "10",
"actual": "10",
"correct": true,
"inputTokens": 6777,
"outputTokens": 967,
"latencyMs": 15240.216207999998
},
{
"questionId": "q94",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "8",
"correct": false,
"inputTokens": 8409,
"outputTokens": 5,
"latencyMs": 1203.151125000004
},
{
"questionId": "q94",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "10",
"actual": "10",
"correct": true,
"inputTokens": 9154,
"outputTokens": 583,
"latencyMs": 6073.186583000002
},
{
"questionId": "q94",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "8",
"correct": false,
"inputTokens": 9284,
"outputTokens": 5,
"latencyMs": 1452.6655419999734
},
{
"questionId": "q94",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "10",
"actual": "10",
"correct": true,
"inputTokens": 7369,
"outputTokens": 647,
"latencyMs": 7084.941665999999
},
{
"questionId": "q94",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "8",
"correct": false,
"inputTokens": 8380,
"outputTokens": 5,
"latencyMs": 1120.7099159999925
},
{
"questionId": "q95",
"format": "json",
"model": "gpt-5-nano",
"expected": "10",
"actual": "10",
"correct": true,
"inputTokens": 9735,
"outputTokens": 903,
"latencyMs": 8906.334791000001
},
{
"questionId": "q95",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "8",
"correct": false,
"inputTokens": 11902,
"outputTokens": 5,
"latencyMs": 1109.434333000012
},
{
"questionId": "q95",
"format": "toon",
"model": "gpt-5-nano",
"expected": "10",
"actual": "10",
"correct": true,
"inputTokens": 6009,
"outputTokens": 391,
"latencyMs": 4955.000415999995
},
{
"questionId": "q95",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "7",
"correct": false,
"inputTokens": 6988,
"outputTokens": 5,
"latencyMs": 1040.817624999996
},
{
"questionId": "q95",
"format": "csv",
"model": "gpt-5-nano",
"expected": "10",
"actual": "10",
"correct": true,
"inputTokens": 6777,
"outputTokens": 775,
"latencyMs": 8308.952791000018
},
{
"questionId": "q95",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "8",
"correct": false,
"inputTokens": 8409,
"outputTokens": 5,
"latencyMs": 1128.542833000014
},
{
"questionId": "q95",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "10",
"actual": "10",
"correct": true,
"inputTokens": 9154,
"outputTokens": 775,
"latencyMs": 7118.855291000014
},
{
"questionId": "q95",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "8",
"correct": false,
"inputTokens": 9284,
"outputTokens": 5,
"latencyMs": 1232.1081249999988
},
{
"questionId": "q95",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "10",
"actual": "10",
"correct": true,
"inputTokens": 7369,
"outputTokens": 647,
"latencyMs": 6776.706208000018
},
{
"questionId": "q95",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "8",
"correct": false,
"inputTokens": 8380,
"outputTokens": 5,
"latencyMs": 1677.1033330000064
},
{
"questionId": "q96",
"format": "json",
"model": "gpt-5-nano",
"expected": "10",
"actual": "10",
"correct": true,
"inputTokens": 9736,
"outputTokens": 583,
"latencyMs": 5866.636624999985
},
{
"questionId": "q96",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "8",
"correct": false,
"inputTokens": 11902,
"outputTokens": 5,
"latencyMs": 1574.224125000008
},
{
"questionId": "q96",
"format": "toon",
"model": "gpt-5-nano",
"expected": "10",
"actual": "10",
"correct": true,
"inputTokens": 6010,
"outputTokens": 711,
"latencyMs": 7998.43637499999
},
{
"questionId": "q96",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "7",
"correct": false,
"inputTokens": 6988,
"outputTokens": 5,
"latencyMs": 1175.3050419999927
},
{
"questionId": "q96",
"format": "csv",
"model": "gpt-5-nano",
"expected": "10",
"actual": "10",
"correct": true,
"inputTokens": 6778,
"outputTokens": 647,
"latencyMs": 6424.974583000003
},
{
"questionId": "q96",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "8",
"correct": false,
"inputTokens": 8409,
"outputTokens": 5,
"latencyMs": 1352.1832500000019
},
{
"questionId": "q96",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "10",
"actual": "10",
"correct": true,
"inputTokens": 9155,
"outputTokens": 647,
"latencyMs": 6132.921792000008
},
{
"questionId": "q96",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "8",
"correct": false,
"inputTokens": 9284,
"outputTokens": 5,
"latencyMs": 1241.7496250000258
},
{
"questionId": "q96",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "10",
"actual": "10",
"correct": true,
"inputTokens": 7370,
"outputTokens": 455,
"latencyMs": 8074.935457999993
},
{
"questionId": "q96",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "7",
"correct": false,
"inputTokens": 8380,
"outputTokens": 5,
"latencyMs": 1294.4225830000069
},
{
"questionId": "q97",
"format": "json",
"model": "gpt-5-nano",
"expected": "10",
"actual": "10",
"correct": true,
"inputTokens": 9736,
"outputTokens": 775,
"latencyMs": 7724.665375000011
},
{
"questionId": "q97",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "10",
"correct": true,
"inputTokens": 11902,
"outputTokens": 5,
"latencyMs": 1450.864333000005
},
{
"questionId": "q97",
"format": "toon",
"model": "gpt-5-nano",
"expected": "10",
"actual": "10",
"correct": true,
"inputTokens": 6010,
"outputTokens": 711,
"latencyMs": 5055.026333999995
},
{
"questionId": "q97",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "10",
"correct": true,
"inputTokens": 6988,
"outputTokens": 5,
"latencyMs": 1177.2059999999765
},
{
"questionId": "q97",
"format": "csv",
"model": "gpt-5-nano",
"expected": "10",
"actual": "10",
"correct": true,
"inputTokens": 6778,
"outputTokens": 839,
"latencyMs": 7951.241416999983
},
{
"questionId": "q97",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "10",
"correct": true,
"inputTokens": 8409,
"outputTokens": 5,
"latencyMs": 1537.2077500000014
},
{
"questionId": "q97",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "10",
"actual": "10",
"correct": true,
"inputTokens": 9155,
"outputTokens": 519,
"latencyMs": 9752.917709000001
},
{
"questionId": "q97",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "10",
"correct": true,
"inputTokens": 9284,
"outputTokens": 5,
"latencyMs": 1101.1202090000152
},
{
"questionId": "q97",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "10",
"actual": "10",
"correct": true,
"inputTokens": 7370,
"outputTokens": 647,
"latencyMs": 5711.038375000004
},
{
"questionId": "q97",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "10",
"correct": true,
"inputTokens": 8380,
"outputTokens": 5,
"latencyMs": 1208.3837910000002
},
{
"questionId": "q98",
"format": "json",
"model": "gpt-5-nano",
"expected": "10",
"actual": "10",
"correct": true,
"inputTokens": 9736,
"outputTokens": 775,
"latencyMs": 6578.005040999997
},
{
"questionId": "q98",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "8",
"correct": false,
"inputTokens": 11902,
"outputTokens": 5,
"latencyMs": 1351.4712499999732
},
{
"questionId": "q98",
"format": "toon",
"model": "gpt-5-nano",
"expected": "10",
"actual": "10",
"correct": true,
"inputTokens": 6010,
"outputTokens": 583,
"latencyMs": 6437.821874999994
},
{
"questionId": "q98",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "8",
"correct": false,
"inputTokens": 6988,
"outputTokens": 5,
"latencyMs": 1155.7898750000168
},
{
"questionId": "q98",
"format": "csv",
"model": "gpt-5-nano",
"expected": "10",
"actual": "10",
"correct": true,
"inputTokens": 6778,
"outputTokens": 647,
"latencyMs": 6673.183250000002
},
{
"questionId": "q98",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "10",
"correct": true,
"inputTokens": 8409,
"outputTokens": 5,
"latencyMs": 1359.994417000009
},
{
"questionId": "q98",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "10",
"actual": "10",
"correct": true,
"inputTokens": 9155,
"outputTokens": 647,
"latencyMs": 5806.33679099998
},
{
"questionId": "q98",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "10",
"correct": true,
"inputTokens": 9284,
"outputTokens": 5,
"latencyMs": 1339.4869999999937
},
{
"questionId": "q98",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "10",
"actual": "10",
"correct": true,
"inputTokens": 7370,
"outputTokens": 519,
"latencyMs": 6011.0411669999885
},
{
"questionId": "q98",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "8",
"correct": false,
"inputTokens": 8380,
"outputTokens": 5,
"latencyMs": 1305.6029999999737
},
{
"questionId": "q99",
"format": "json",
"model": "gpt-5-nano",
"expected": "42342.25",
"actual": "41001.14",
"correct": false,
"inputTokens": 9736,
"outputTokens": 1226,
"latencyMs": 11276.714458000002
},
{
"questionId": "q99",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "42342.25",
"actual": "48,847.66",
"correct": false,
"inputTokens": 11902,
"outputTokens": 9,
"latencyMs": 1400.5162910000072
},
{
"questionId": "q99",
"format": "toon",
"model": "gpt-5-nano",
"expected": "42342.25",
"actual": "42342.25",
"correct": true,
"inputTokens": 6010,
"outputTokens": 5962,
"latencyMs": 50971.727667
},
{
"questionId": "q99",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "42342.25",
"actual": "41,847.47",
"correct": false,
"inputTokens": 6988,
"outputTokens": 9,
"latencyMs": 1118.9986250000075
},
{
"questionId": "q99",
"format": "csv",
"model": "gpt-5-nano",
"expected": "42342.25",
"actual": "42342.25",
"correct": true,
"inputTokens": 6778,
"outputTokens": 3082,
"latencyMs": 22816.508165999985
},
{
"questionId": "q99",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "42342.25",
"actual": "48,847.47",
"correct": false,
"inputTokens": 8409,
"outputTokens": 9,
"latencyMs": 1104.31912499998
},
{
"questionId": "q99",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "42342.25",
"actual": "42425.97",
"correct": false,
"inputTokens": 9155,
"outputTokens": 2762,
"latencyMs": 17412.623583000008
},
{
"questionId": "q99",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "42342.25",
"actual": "47,847.47",
"correct": false,
"inputTokens": 9284,
"outputTokens": 9,
"latencyMs": 1435.553082999977
},
{
"questionId": "q99",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "42342.25",
"actual": "42342.25",
"correct": true,
"inputTokens": 7370,
"outputTokens": 3402,
"latencyMs": 26299.00112500001
},
{
"questionId": "q99",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "42342.25",
"actual": "41,847.47",
"correct": false,
"inputTokens": 8380,
"outputTokens": 9,
"latencyMs": 1272.4541250000184
},
{
"questionId": "q100",
"format": "json",
"model": "gpt-5-nano",
"expected": "44",
"actual": "44",
"correct": true,
"inputTokens": 9738,
"outputTokens": 1351,
"latencyMs": 13461.932250000013
},
{
"questionId": "q100",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "44",
"actual": "48",
"correct": false,
"inputTokens": 11904,
"outputTokens": 5,
"latencyMs": 1772.9891250000219
},
{
"questionId": "q100",
"format": "toon",
"model": "gpt-5-nano",
"expected": "44",
"actual": "44",
"correct": true,
"inputTokens": 6012,
"outputTokens": 1735,
"latencyMs": 14196.807250000013
},
{
"questionId": "q100",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "44",
"actual": "47",
"correct": false,
"inputTokens": 6990,
"outputTokens": 5,
"latencyMs": 1749.7322920000006
},
{
"questionId": "q100",
"format": "csv",
"model": "gpt-5-nano",
"expected": "44",
"actual": "44",
"correct": true,
"inputTokens": 6780,
"outputTokens": 1863,
"latencyMs": 14291.044916999992
},
{
"questionId": "q100",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "44",
"actual": "47",
"correct": false,
"inputTokens": 8411,
"outputTokens": 5,
"latencyMs": 1453.1822079999838
},
{
"questionId": "q100",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "44",
"actual": "44",
"correct": true,
"inputTokens": 9157,
"outputTokens": 1799,
"latencyMs": 16012.806332999986
},
{
"questionId": "q100",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "44",
"actual": "48",
"correct": false,
"inputTokens": 9286,
"outputTokens": 5,
"latencyMs": 1761.131041000015
},
{
"questionId": "q100",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "44",
"actual": "44",
"correct": true,
"inputTokens": 7372,
"outputTokens": 1415,
"latencyMs": 12218.14491599999
},
{
"questionId": "q100",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "44",
"actual": "45",
"correct": false,
"inputTokens": 8382,
"outputTokens": 5,
"latencyMs": 1255.681917000009
},
{
"questionId": "q101",
"format": "json",
"model": "gpt-5-nano",
"expected": "39",
"actual": "39",
"correct": true,
"inputTokens": 9738,
"outputTokens": 2311,
"latencyMs": 22316.87704199998
},
{
"questionId": "q101",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "39",
"actual": "38",
"correct": false,
"inputTokens": 11904,
"outputTokens": 5,
"latencyMs": 1090.176792000013
},
{
"questionId": "q101",
"format": "toon",
"model": "gpt-5-nano",
"expected": "39",
"actual": "39",
"correct": true,
"inputTokens": 6012,
"outputTokens": 1095,
"latencyMs": 7211.767082999984
},
{
"questionId": "q101",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "39",
"actual": "38",
"correct": false,
"inputTokens": 6990,
"outputTokens": 5,
"latencyMs": 1129.9290000000037
},
{
"questionId": "q101",
"format": "csv",
"model": "gpt-5-nano",
"expected": "39",
"actual": "39",
"correct": true,
"inputTokens": 6780,
"outputTokens": 1415,
"latencyMs": 15701.471499999985
},
{
"questionId": "q101",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "39",
"actual": "38",
"correct": false,
"inputTokens": 8411,
"outputTokens": 5,
"latencyMs": 1251.5472500000033
},
{
"questionId": "q101",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "39",
"actual": "39",
"correct": true,
"inputTokens": 9157,
"outputTokens": 1799,
"latencyMs": 16689.30345800001
},
{
"questionId": "q101",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "39",
"actual": "41",
"correct": false,
"inputTokens": 9286,
"outputTokens": 5,
"latencyMs": 1168.8190419999883
},
{
"questionId": "q101",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "39",
"actual": "39",
"correct": true,
"inputTokens": 7372,
"outputTokens": 1863,
"latencyMs": 14505.393958999979
},
{
"questionId": "q101",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "39",
"actual": "38",
"correct": false,
"inputTokens": 8382,
"outputTokens": 5,
"latencyMs": 1149.8783330000006
},
{
"questionId": "q102",
"format": "json",
"model": "gpt-5-nano",
"expected": "32",
"actual": "32",
"correct": true,
"inputTokens": 9738,
"outputTokens": 1607,
"latencyMs": 13945.93979200002
},
{
"questionId": "q102",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "32",
"actual": "28",
"correct": false,
"inputTokens": 11904,
"outputTokens": 5,
"latencyMs": 1175.8143749999872
},
{
"questionId": "q102",
"format": "toon",
"model": "gpt-5-nano",
"expected": "32",
"actual": "32",
"correct": true,
"inputTokens": 6012,
"outputTokens": 1351,
"latencyMs": 11991.764750000002
},
{
"questionId": "q102",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "32",
"actual": "26",
"correct": false,
"inputTokens": 6990,
"outputTokens": 5,
"latencyMs": 1643.4279169999936
},
{
"questionId": "q102",
"format": "csv",
"model": "gpt-5-nano",
"expected": "32",
"actual": "32",
"correct": true,
"inputTokens": 6780,
"outputTokens": 1799,
"latencyMs": 17324.695000000007
},
{
"questionId": "q102",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "32",
"actual": "28",
"correct": false,
"inputTokens": 8411,
"outputTokens": 5,
"latencyMs": 1197.7254160000011
},
{
"questionId": "q102",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "32",
"actual": "32",
"correct": true,
"inputTokens": 9157,
"outputTokens": 1607,
"latencyMs": 22426.01029199999
},
{
"questionId": "q102",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "32",
"actual": "28",
"correct": false,
"inputTokens": 9286,
"outputTokens": 5,
"latencyMs": 1065.6509170000209
},
{
"questionId": "q102",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "32",
"actual": "31",
"correct": false,
"inputTokens": 7372,
"outputTokens": 1543,
"latencyMs": 12786.843416999996
},
{
"questionId": "q102",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "32",
"actual": "26",
"correct": false,
"inputTokens": 8382,
"outputTokens": 5,
"latencyMs": 2054.993749999994
},
{
"questionId": "q103",
"format": "json",
"model": "gpt-5-nano",
"expected": "6975",
"actual": "6975",
"correct": true,
"inputTokens": 3712,
"outputTokens": 72,
"latencyMs": 2244.986208999995
},
{
"questionId": "q103",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "6975",
"actual": "6975",
"correct": true,
"inputTokens": 4080,
"outputTokens": 6,
"latencyMs": 1162.9390420000127
},
{
"questionId": "q103",
"format": "toon",
"model": "gpt-5-nano",
"expected": "6975",
"actual": "6975",
"correct": true,
"inputTokens": 1563,
"outputTokens": 136,
"latencyMs": 2179.3558330000087
},
{
"questionId": "q103",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "6975",
"actual": "6975",
"correct": true,
"inputTokens": 1509,
"outputTokens": 6,
"latencyMs": 1013.4975409999897
},
{
"questionId": "q103",
"format": "csv",
"model": "gpt-5-nano",
"expected": "6975",
"actual": "6975",
"correct": true,
"inputTokens": 1441,
"outputTokens": 72,
"latencyMs": 4859.720833999978
},
{
"questionId": "q103",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "6975",
"actual": "6975",
"correct": true,
"inputTokens": 1445,
"outputTokens": 6,
"latencyMs": 1437.758375000005
},
{
"questionId": "q103",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "6975",
"actual": "6975",
"correct": true,
"inputTokens": 3829,
"outputTokens": 72,
"latencyMs": 3120.702874999988
},
{
"questionId": "q103",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "6975",
"actual": "6975",
"correct": true,
"inputTokens": 3415,
"outputTokens": 6,
"latencyMs": 1051.775708000001
},
{
"questionId": "q103",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "6975",
"actual": "6975",
"correct": true,
"inputTokens": 2985,
"outputTokens": 72,
"latencyMs": 2182.880084000004
},
{
"questionId": "q103",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "6975",
"actual": "6975",
"correct": true,
"inputTokens": 3110,
"outputTokens": 6,
"latencyMs": 1045.2009580000013
},
{
"questionId": "q104",
"format": "json",
"model": "gpt-5-nano",
"expected": "6686.23",
"actual": "6686.23",
"correct": true,
"inputTokens": 3711,
"outputTokens": 138,
"latencyMs": 5291.923750000016
},
{
"questionId": "q104",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "6686.23",
"actual": "6686.23",
"correct": true,
"inputTokens": 4079,
"outputTokens": 8,
"latencyMs": 1009.6958750000049
},
{
"questionId": "q104",
"format": "toon",
"model": "gpt-5-nano",
"expected": "6686.23",
"actual": "6686.23",
"correct": true,
"inputTokens": 1562,
"outputTokens": 74,
"latencyMs": 2582.2320419999887
},
{
"questionId": "q104",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "6686.23",
"actual": "6686.23",
"correct": true,
"inputTokens": 1508,
"outputTokens": 8,
"latencyMs": 1203.816542000015
},
{
"questionId": "q104",
"format": "csv",
"model": "gpt-5-nano",
"expected": "6686.23",
"actual": "6686.23",
"correct": true,
"inputTokens": 1440,
"outputTokens": 138,
"latencyMs": 2774.835167000012
},
{
"questionId": "q104",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "6686.23",
"actual": "6686.23",
"correct": true,
"inputTokens": 1444,
"outputTokens": 8,
"latencyMs": 979.9191669999855
},
{
"questionId": "q104",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "6686.23",
"actual": "6686.23",
"correct": true,
"inputTokens": 3828,
"outputTokens": 138,
"latencyMs": 2616.684333000012
},
{
"questionId": "q104",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "6686.23",
"actual": "6686.23",
"correct": true,
"inputTokens": 3414,
"outputTokens": 8,
"latencyMs": 1253.4844169999997
},
{
"questionId": "q104",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "6686.23",
"actual": "6686.23",
"correct": true,
"inputTokens": 2984,
"outputTokens": 74,
"latencyMs": 2267.1155000000144
},
{
"questionId": "q104",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "6686.23",
"actual": "6686.23",
"correct": true,
"inputTokens": 3109,
"outputTokens": 8,
"latencyMs": 1185.4212080000143
},
{
"questionId": "q105",
"format": "json",
"model": "gpt-5-nano",
"expected": "7500",
"actual": "7500",
"correct": true,
"inputTokens": 3712,
"outputTokens": 136,
"latencyMs": 2905.6011250000156
},
{
"questionId": "q105",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "7500",
"actual": "7500",
"correct": true,
"inputTokens": 4080,
"outputTokens": 6,
"latencyMs": 1571.1469999999972
},
{
"questionId": "q105",
"format": "toon",
"model": "gpt-5-nano",
"expected": "7500",
"actual": "7500",
"correct": true,
"inputTokens": 1563,
"outputTokens": 328,
"latencyMs": 3884.65858399999
},
{
"questionId": "q105",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "7500",
"actual": "7500",
"correct": true,
"inputTokens": 1509,
"outputTokens": 6,
"latencyMs": 1207.1518330000108
},
{
"questionId": "q105",
"format": "csv",
"model": "gpt-5-nano",
"expected": "7500",
"actual": "7500",
"correct": true,
"inputTokens": 1441,
"outputTokens": 72,
"latencyMs": 1995.0557919999992
},
{
"questionId": "q105",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "7500",
"actual": "7500",
"correct": true,
"inputTokens": 1445,
"outputTokens": 6,
"latencyMs": 1238.8113749999902
},
{
"questionId": "q105",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "7500",
"actual": "7500",
"correct": true,
"inputTokens": 3829,
"outputTokens": 136,
"latencyMs": 5824.06574999998
},
{
"questionId": "q105",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "7500",
"actual": "7500",
"correct": true,
"inputTokens": 3415,
"outputTokens": 6,
"latencyMs": 1337.474749999994
},
{
"questionId": "q105",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "7500",
"actual": "7500",
"correct": true,
"inputTokens": 2985,
"outputTokens": 136,
"latencyMs": 2286.1839580000087
},
{
"questionId": "q105",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "7500",
"actual": "7500",
"correct": true,
"inputTokens": 3110,
"outputTokens": 6,
"latencyMs": 1326.3640000000014
},
{
"questionId": "q106",
"format": "json",
"model": "gpt-5-nano",
"expected": "14297.05",
"actual": "14297.05",
"correct": true,
"inputTokens": 3711,
"outputTokens": 138,
"latencyMs": 3801.309249999991
},
{
"questionId": "q106",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "14297.05",
"actual": "14297.05",
"correct": true,
"inputTokens": 4079,
"outputTokens": 8,
"latencyMs": 1054.8991249999963
},
{
"questionId": "q106",
"format": "toon",
"model": "gpt-5-nano",
"expected": "14297.05",
"actual": "14297.05",
"correct": true,
"inputTokens": 1562,
"outputTokens": 74,
"latencyMs": 3338.1347499999974
},
{
"questionId": "q106",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "14297.05",
"actual": "14297.05",
"correct": true,
"inputTokens": 1508,
"outputTokens": 8,
"latencyMs": 1393.589082999999
},
{
"questionId": "q106",
"format": "csv",
"model": "gpt-5-nano",
"expected": "14297.05",
"actual": "14297.05",
"correct": true,
"inputTokens": 1440,
"outputTokens": 202,
"latencyMs": 3719.6092089999875
},
{
"questionId": "q106",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "14297.05",
"actual": "14297.05",
"correct": true,
"inputTokens": 1444,
"outputTokens": 8,
"latencyMs": 1030.9656669999822
},
{
"questionId": "q106",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "14297.05",
"actual": "14297.05",
"correct": true,
"inputTokens": 3828,
"outputTokens": 74,
"latencyMs": 2226.628250000009
},
{
"questionId": "q106",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "14297.05",
"actual": "14297.05",
"correct": true,
"inputTokens": 3414,
"outputTokens": 8,
"latencyMs": 1154.132540999999
},
{
"questionId": "q106",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "14297.05",
"actual": "14297.05",
"correct": true,
"inputTokens": 2984,
"outputTokens": 138,
"latencyMs": 2922.2590830000117
},
{
"questionId": "q106",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "14297.05",
"actual": "14297.05",
"correct": true,
"inputTokens": 3109,
"outputTokens": 8,
"latencyMs": 2048.011916999996
},
{
"questionId": "q107",
"format": "json",
"model": "gpt-5-nano",
"expected": "6692",
"actual": "6692",
"correct": true,
"inputTokens": 3712,
"outputTokens": 200,
"latencyMs": 2520.5313329999917
},
{
"questionId": "q107",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "6692",
"actual": "6692",
"correct": true,
"inputTokens": 4080,
"outputTokens": 6,
"latencyMs": 943.3422089999949
},
{
"questionId": "q107",
"format": "toon",
"model": "gpt-5-nano",
"expected": "6692",
"actual": "6692",
"correct": true,
"inputTokens": 1563,
"outputTokens": 136,
"latencyMs": 2300.8406249999825
},
{
"questionId": "q107",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "6692",
"actual": "6692",
"correct": true,
"inputTokens": 1509,
"outputTokens": 6,
"latencyMs": 1128.4146670000046
},
{
"questionId": "q107",
"format": "csv",
"model": "gpt-5-nano",
"expected": "6692",
"actual": "6692",
"correct": true,
"inputTokens": 1441,
"outputTokens": 200,
"latencyMs": 2929.585208000004
},
{
"questionId": "q107",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "6692",
"actual": "6692",
"correct": true,
"inputTokens": 1445,
"outputTokens": 6,
"latencyMs": 1230.4635420000122
},
{
"questionId": "q107",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "6692",
"actual": "6692",
"correct": true,
"inputTokens": 3829,
"outputTokens": 136,
"latencyMs": 3650.3654169999936
},
{
"questionId": "q107",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "6692",
"actual": "6692",
"correct": true,
"inputTokens": 3415,
"outputTokens": 6,
"latencyMs": 985.8184590000019
},
{
"questionId": "q107",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "6692",
"actual": "6692",
"correct": true,
"inputTokens": 2985,
"outputTokens": 328,
"latencyMs": 3772.2553330000082
},
{
"questionId": "q107",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "6692",
"actual": "6692",
"correct": true,
"inputTokens": 3110,
"outputTokens": 6,
"latencyMs": 1311.8630419999827
},
{
"questionId": "q108",
"format": "json",
"model": "gpt-5-nano",
"expected": "9302.76",
"actual": "9302.76",
"correct": true,
"inputTokens": 3711,
"outputTokens": 138,
"latencyMs": 2935.785124999995
},
{
"questionId": "q108",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "9302.76",
"actual": "9302.76",
"correct": true,
"inputTokens": 4079,
"outputTokens": 8,
"latencyMs": 1391.9168749999953
},
{
"questionId": "q108",
"format": "toon",
"model": "gpt-5-nano",
"expected": "9302.76",
"actual": "9302.76",
"correct": true,
"inputTokens": 1562,
"outputTokens": 138,
"latencyMs": 5759.15529200001
},
{
"questionId": "q108",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "9302.76",
"actual": "9302.76",
"correct": true,
"inputTokens": 1508,
"outputTokens": 8,
"latencyMs": 1064.3980420000153
},
{
"questionId": "q108",
"format": "csv",
"model": "gpt-5-nano",
"expected": "9302.76",
"actual": "9302.76",
"correct": true,
"inputTokens": 1440,
"outputTokens": 74,
"latencyMs": 3640.193708000006
},
{
"questionId": "q108",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "9302.76",
"actual": "9302.76",
"correct": true,
"inputTokens": 1444,
"outputTokens": 8,
"latencyMs": 983.806166000024
},
{
"questionId": "q108",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "9302.76",
"actual": "9302.76",
"correct": true,
"inputTokens": 3828,
"outputTokens": 266,
"latencyMs": 2604.2135000000126
},
{
"questionId": "q108",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "9302.76",
"actual": "9302.76",
"correct": true,
"inputTokens": 3414,
"outputTokens": 8,
"latencyMs": 1128.6182499999995
},
{
"questionId": "q108",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "9302.76",
"actual": "9302.76",
"correct": true,
"inputTokens": 2984,
"outputTokens": 138,
"latencyMs": 2548.5608749999956
},
{
"questionId": "q108",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "9302.76",
"actual": "9302.76",
"correct": true,
"inputTokens": 3109,
"outputTokens": 8,
"latencyMs": 1029.5365000000165
},
{
"questionId": "q109",
"format": "json",
"model": "gpt-5-nano",
"expected": "3285",
"actual": "3285",
"correct": true,
"inputTokens": 3712,
"outputTokens": 136,
"latencyMs": 3983.6009170000034
},
{
"questionId": "q109",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "3285",
"actual": "3285",
"correct": true,
"inputTokens": 4080,
"outputTokens": 6,
"latencyMs": 1095.2366250000196
},
{
"questionId": "q109",
"format": "toon",
"model": "gpt-5-nano",
"expected": "3285",
"actual": "3285",
"correct": true,
"inputTokens": 1563,
"outputTokens": 72,
"latencyMs": 2207.884417000023
},
{
"questionId": "q109",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "3285",
"actual": "3285",
"correct": true,
"inputTokens": 1509,
"outputTokens": 6,
"latencyMs": 2292.4111660000053
},
{
"questionId": "q109",
"format": "csv",
"model": "gpt-5-nano",
"expected": "3285",
"actual": "3285",
"correct": true,
"inputTokens": 1441,
"outputTokens": 136,
"latencyMs": 2749.430541000009
},
{
"questionId": "q109",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "3285",
"actual": "3285",
"correct": true,
"inputTokens": 1445,
"outputTokens": 6,
"latencyMs": 1215.8329999999842
},
{
"questionId": "q109",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "3285",
"actual": "3285",
"correct": true,
"inputTokens": 3829,
"outputTokens": 136,
"latencyMs": 2086.6161659999925
},
{
"questionId": "q109",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "3285",
"actual": "3285",
"correct": true,
"inputTokens": 3415,
"outputTokens": 6,
"latencyMs": 1299.715790999995
},
{
"questionId": "q109",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "3285",
"actual": "3285",
"correct": true,
"inputTokens": 2985,
"outputTokens": 136,
"latencyMs": 7107.394916999998
},
{
"questionId": "q109",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "3285",
"actual": "3285",
"correct": true,
"inputTokens": 3110,
"outputTokens": 6,
"latencyMs": 899.2319579999894
},
{
"questionId": "q110",
"format": "json",
"model": "gpt-5-nano",
"expected": "3826.93",
"actual": "3826.93",
"correct": true,
"inputTokens": 3711,
"outputTokens": 138,
"latencyMs": 2810.5213330000115
},
{
"questionId": "q110",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "3826.93",
"actual": "3826.93",
"correct": true,
"inputTokens": 4079,
"outputTokens": 8,
"latencyMs": 989.2326659999962
},
{
"questionId": "q110",
"format": "toon",
"model": "gpt-5-nano",
"expected": "3826.93",
"actual": "3826.93",
"correct": true,
"inputTokens": 1562,
"outputTokens": 138,
"latencyMs": 2622.7841670000053
},
{
"questionId": "q110",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "3826.93",
"actual": "3826.93",
"correct": true,
"inputTokens": 1508,
"outputTokens": 8,
"latencyMs": 850.1227920000092
},
{
"questionId": "q110",
"format": "csv",
"model": "gpt-5-nano",
"expected": "3826.93",
"actual": "3826.93",
"correct": true,
"inputTokens": 1440,
"outputTokens": 138,
"latencyMs": 3057.1578750000044
},
{
"questionId": "q110",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "3826.93",
"actual": "3826.93",
"correct": true,
"inputTokens": 1444,
"outputTokens": 8,
"latencyMs": 1261.3340000000026
},
{
"questionId": "q110",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "3826.93",
"actual": "3826.93",
"correct": true,
"inputTokens": 3828,
"outputTokens": 202,
"latencyMs": 3061.791499999992
},
{
"questionId": "q110",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "3826.93",
"actual": "3826.93",
"correct": true,
"inputTokens": 3414,
"outputTokens": 8,
"latencyMs": 1196.6509999999835
},
{
"questionId": "q110",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "3826.93",
"actual": "3826.93",
"correct": true,
"inputTokens": 2984,
"outputTokens": 138,
"latencyMs": 3567.4540839999972
},
{
"questionId": "q110",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "3826.93",
"actual": "3826.93",
"correct": true,
"inputTokens": 3109,
"outputTokens": 8,
"latencyMs": 1033.8556249999965
},
{
"questionId": "q111",
"format": "json",
"model": "gpt-5-nano",
"expected": "6191",
"actual": "6191",
"correct": true,
"inputTokens": 3712,
"outputTokens": 136,
"latencyMs": 2842.961707999988
},
{
"questionId": "q111",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "6191",
"actual": "6191",
"correct": true,
"inputTokens": 4080,
"outputTokens": 6,
"latencyMs": 1258.130582999991
},
{
"questionId": "q111",
"format": "toon",
"model": "gpt-5-nano",
"expected": "6191",
"actual": "6191",
"correct": true,
"inputTokens": 1563,
"outputTokens": 456,
"latencyMs": 5828.652415999997
},
{
"questionId": "q111",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "6191",
"actual": "6191",
"correct": true,
"inputTokens": 1509,
"outputTokens": 6,
"latencyMs": 1004.821958000015
},
{
"questionId": "q111",
"format": "csv",
"model": "gpt-5-nano",
"expected": "6191",
"actual": "6191",
"correct": true,
"inputTokens": 1441,
"outputTokens": 72,
"latencyMs": 3102.38612499999
},
{
"questionId": "q111",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "6191",
"actual": "6191",
"correct": true,
"inputTokens": 1445,
"outputTokens": 6,
"latencyMs": 1454.8658750000177
},
{
"questionId": "q111",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "6191",
"actual": "6191",
"correct": true,
"inputTokens": 3829,
"outputTokens": 136,
"latencyMs": 2018.8434999999881
},
{
"questionId": "q111",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "6191",
"actual": "6191",
"correct": true,
"inputTokens": 3415,
"outputTokens": 6,
"latencyMs": 1237.4057080000057
},
{
"questionId": "q111",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "6191",
"actual": "6191",
"correct": true,
"inputTokens": 2985,
"outputTokens": 136,
"latencyMs": 3670.7451670000155
},
{
"questionId": "q111",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "6191",
"actual": "6191",
"correct": true,
"inputTokens": 3110,
"outputTokens": 6,
"latencyMs": 1070.646584000002
},
{
"questionId": "q112",
"format": "json",
"model": "gpt-5-nano",
"expected": "1854.66",
"actual": "1854.66",
"correct": true,
"inputTokens": 3711,
"outputTokens": 202,
"latencyMs": 3731.3879579999775
},
{
"questionId": "q112",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "1854.66",
"actual": "1854.66",
"correct": true,
"inputTokens": 4079,
"outputTokens": 8,
"latencyMs": 1387.9798329999903
},
{
"questionId": "q112",
"format": "toon",
"model": "gpt-5-nano",
"expected": "1854.66",
"actual": "1854.66",
"correct": true,
"inputTokens": 1562,
"outputTokens": 394,
"latencyMs": 5560.397957999987
},
{
"questionId": "q112",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "1854.66",
"actual": "1854.66",
"correct": true,
"inputTokens": 1508,
"outputTokens": 8,
"latencyMs": 1552.963958999986
},
{
"questionId": "q112",
"format": "csv",
"model": "gpt-5-nano",
"expected": "1854.66",
"actual": "1854.66",
"correct": true,
"inputTokens": 1440,
"outputTokens": 138,
"latencyMs": 21759.84366700001
},
{
"questionId": "q112",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "1854.66",
"actual": "1854.66",
"correct": true,
"inputTokens": 1444,
"outputTokens": 8,
"latencyMs": 1132.519083000021
},
{
"questionId": "q112",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "1854.66",
"actual": "1854.66",
"correct": true,
"inputTokens": 3828,
"outputTokens": 138,
"latencyMs": 2277.2652499999967
},
{
"questionId": "q112",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "1854.66",
"actual": "1854.66",
"correct": true,
"inputTokens": 3414,
"outputTokens": 8,
"latencyMs": 1098.0825420000183
},
{
"questionId": "q112",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "1854.66",
"actual": "1854.66",
"correct": true,
"inputTokens": 2984,
"outputTokens": 202,
"latencyMs": 2813.10504200001
},
{
"questionId": "q112",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "1854.66",
"actual": "1854.66",
"correct": true,
"inputTokens": 3109,
"outputTokens": 8,
"latencyMs": 1131.9674159999995
},
{
"questionId": "q113",
"format": "json",
"model": "gpt-5-nano",
"expected": "4696",
"actual": "4696",
"correct": true,
"inputTokens": 3712,
"outputTokens": 136,
"latencyMs": 6657.446207999979
},
{
"questionId": "q113",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "4696",
"actual": "4696",
"correct": true,
"inputTokens": 4080,
"outputTokens": 6,
"latencyMs": 1265.4548749999958
},
{
"questionId": "q113",
"format": "toon",
"model": "gpt-5-nano",
"expected": "4696",
"actual": "4696",
"correct": true,
"inputTokens": 1563,
"outputTokens": 136,
"latencyMs": 3299.298792000016
},
{
"questionId": "q113",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "4696",
"actual": "4696",
"correct": true,
"inputTokens": 1509,
"outputTokens": 6,
"latencyMs": 1618.5091249999823
},
{
"questionId": "q113",
"format": "csv",
"model": "gpt-5-nano",
"expected": "4696",
"actual": "4696",
"correct": true,
"inputTokens": 1441,
"outputTokens": 136,
"latencyMs": 5353.29241699999
},
{
"questionId": "q113",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "4696",
"actual": "4696",
"correct": true,
"inputTokens": 1445,
"outputTokens": 6,
"latencyMs": 870.5113749999728
},
{
"questionId": "q113",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "4696",
"actual": "4696",
"correct": true,
"inputTokens": 3829,
"outputTokens": 200,
"latencyMs": 2780.5659159999923
},
{
"questionId": "q113",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "4696",
"actual": "4696",
"correct": true,
"inputTokens": 3415,
"outputTokens": 6,
"latencyMs": 1069.2415409999958
},
{
"questionId": "q113",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "4696",
"actual": "4696",
"correct": true,
"inputTokens": 2985,
"outputTokens": 200,
"latencyMs": 3036.145666999975
},
{
"questionId": "q113",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "4696",
"actual": "4696",
"correct": true,
"inputTokens": 3110,
"outputTokens": 6,
"latencyMs": 1252.9633329999924
},
{
"questionId": "q114",
"format": "json",
"model": "gpt-5-nano",
"expected": "4211.6",
"actual": "4211.6",
"correct": true,
"inputTokens": 3711,
"outputTokens": 138,
"latencyMs": 2617.047249999974
},
{
"questionId": "q114",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "4211.6",
"actual": "4211.6",
"correct": true,
"inputTokens": 4079,
"outputTokens": 8,
"latencyMs": 1261.9117079999996
},
{
"questionId": "q114",
"format": "toon",
"model": "gpt-5-nano",
"expected": "4211.6",
"actual": "4211.6",
"correct": true,
"inputTokens": 1562,
"outputTokens": 202,
"latencyMs": 6192.06358300001
},
{
"questionId": "q114",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "4211.6",
"actual": "4211.6",
"correct": true,
"inputTokens": 1508,
"outputTokens": 8,
"latencyMs": 1158.3806249999907
},
{
"questionId": "q114",
"format": "csv",
"model": "gpt-5-nano",
"expected": "4211.6",
"actual": "4211.6",
"correct": true,
"inputTokens": 1440,
"outputTokens": 138,
"latencyMs": 2867.840083999996
},
{
"questionId": "q114",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "4211.6",
"actual": "4211.6",
"correct": true,
"inputTokens": 1444,
"outputTokens": 8,
"latencyMs": 856.2939580000238
},
{
"questionId": "q114",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "4211.6",
"actual": "4211.6",
"correct": true,
"inputTokens": 3828,
"outputTokens": 138,
"latencyMs": 2329.6339579999913
},
{
"questionId": "q114",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "4211.6",
"actual": "4211.6",
"correct": true,
"inputTokens": 3414,
"outputTokens": 8,
"latencyMs": 1106.5591669999994
},
{
"questionId": "q114",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "4211.6",
"actual": "4211.6",
"correct": true,
"inputTokens": 2984,
"outputTokens": 138,
"latencyMs": 2590.7533330000006
},
{
"questionId": "q114",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "4211.6",
"actual": "4211.6",
"correct": true,
"inputTokens": 3109,
"outputTokens": 8,
"latencyMs": 1007.0892920000188
},
{
"questionId": "q115",
"format": "json",
"model": "gpt-5-nano",
"expected": "6196",
"actual": "6196",
"correct": true,
"inputTokens": 3712,
"outputTokens": 200,
"latencyMs": 3839.2745000000286
},
{
"questionId": "q115",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "6196",
"actual": "6196",
"correct": true,
"inputTokens": 4080,
"outputTokens": 6,
"latencyMs": 1388.2399160000205
},
{
"questionId": "q115",
"format": "toon",
"model": "gpt-5-nano",
"expected": "6196",
"actual": "6196",
"correct": true,
"inputTokens": 1563,
"outputTokens": 200,
"latencyMs": 3955.22095800002
},
{
"questionId": "q115",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "6196",
"actual": "6196",
"correct": true,
"inputTokens": 1509,
"outputTokens": 6,
"latencyMs": 1036.567458000005
},
{
"questionId": "q115",
"format": "csv",
"model": "gpt-5-nano",
"expected": "6196",
"actual": "6196",
"correct": true,
"inputTokens": 1441,
"outputTokens": 200,
"latencyMs": 5566.705209000007
},
{
"questionId": "q115",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "6196",
"actual": "6196",
"correct": true,
"inputTokens": 1445,
"outputTokens": 6,
"latencyMs": 1078.5011670000094
},
{
"questionId": "q115",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "6196",
"actual": "6196",
"correct": true,
"inputTokens": 3829,
"outputTokens": 200,
"latencyMs": 2956.9618330000376
},
{
"questionId": "q115",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "6196",
"actual": "6196",
"correct": true,
"inputTokens": 3415,
"outputTokens": 6,
"latencyMs": 1797.4496250000084
},
{
"questionId": "q115",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "6196",
"actual": "6196",
"correct": true,
"inputTokens": 2985,
"outputTokens": 136,
"latencyMs": 2647.741832999978
},
{
"questionId": "q115",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "6196",
"actual": "6196",
"correct": true,
"inputTokens": 3110,
"outputTokens": 6,
"latencyMs": 1221.9055410000146
},
{
"questionId": "q116",
"format": "json",
"model": "gpt-5-nano",
"expected": "6105.3",
"actual": "6105.3",
"correct": true,
"inputTokens": 3711,
"outputTokens": 138,
"latencyMs": 3783.334333000006
},
{
"questionId": "q116",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "6105.3",
"actual": "6105.30",
"correct": true,
"inputTokens": 4079,
"outputTokens": 8,
"latencyMs": 1135.7771670000511
},
{
"questionId": "q116",
"format": "toon",
"model": "gpt-5-nano",
"expected": "6105.3",
"actual": "6105.3",
"correct": true,
"inputTokens": 1562,
"outputTokens": 266,
"latencyMs": 3364.4232920000213
},
{
"questionId": "q116",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "6105.3",
"actual": "6105.3",
"correct": true,
"inputTokens": 1508,
"outputTokens": 8,
"latencyMs": 1161.263666999992
},
{
"questionId": "q116",
"format": "csv",
"model": "gpt-5-nano",
"expected": "6105.3",
"actual": "6105.3",
"correct": true,
"inputTokens": 1440,
"outputTokens": 74,
"latencyMs": 3646.0659589999705
},
{
"questionId": "q116",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "6105.3",
"actual": "6105.3",
"correct": true,
"inputTokens": 1444,
"outputTokens": 8,
"latencyMs": 955.7597500000265
},
{
"questionId": "q116",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "6105.3",
"actual": "6105.3",
"correct": true,
"inputTokens": 3828,
"outputTokens": 74,
"latencyMs": 2345.2203750000335
},
{
"questionId": "q116",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "6105.3",
"actual": "6105.3",
"correct": true,
"inputTokens": 3414,
"outputTokens": 8,
"latencyMs": 1541.918249999988
},
{
"questionId": "q116",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "6105.3",
"actual": "6105.3",
"correct": true,
"inputTokens": 2984,
"outputTokens": 138,
"latencyMs": 6126.976708000002
},
{
"questionId": "q116",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "6105.3",
"actual": "6105.3",
"correct": true,
"inputTokens": 3109,
"outputTokens": 8,
"latencyMs": 1097.440709000046
},
{
"questionId": "q117",
"format": "json",
"model": "gpt-5-nano",
"expected": "6528",
"actual": "6528",
"correct": true,
"inputTokens": 3712,
"outputTokens": 264,
"latencyMs": 3404.643708999967
},
{
"questionId": "q117",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "6528",
"actual": "6528",
"correct": true,
"inputTokens": 4080,
"outputTokens": 6,
"latencyMs": 1227.7047499999753
},
{
"questionId": "q117",
"format": "toon",
"model": "gpt-5-nano",
"expected": "6528",
"actual": "6528",
"correct": true,
"inputTokens": 1563,
"outputTokens": 136,
"latencyMs": 2495.85037499998
},
{
"questionId": "q117",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "6528",
"actual": "6528",
"correct": true,
"inputTokens": 1509,
"outputTokens": 6,
"latencyMs": 1048.344832999981
},
{
"questionId": "q117",
"format": "csv",
"model": "gpt-5-nano",
"expected": "6528",
"actual": "6528",
"correct": true,
"inputTokens": 1441,
"outputTokens": 136,
"latencyMs": 3007.2462499999674
},
{
"questionId": "q117",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "6528",
"actual": "6528",
"correct": true,
"inputTokens": 1445,
"outputTokens": 6,
"latencyMs": 840.0351669999654
},
{
"questionId": "q117",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "6528",
"actual": "6528",
"correct": true,
"inputTokens": 3829,
"outputTokens": 328,
"latencyMs": 3149.872374999977
},
{
"questionId": "q117",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "6528",
"actual": "6528",
"correct": true,
"inputTokens": 3415,
"outputTokens": 6,
"latencyMs": 973.716167000006
},
{
"questionId": "q117",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "6528",
"actual": "6528",
"correct": true,
"inputTokens": 2985,
"outputTokens": 456,
"latencyMs": 5305.827791999967
},
{
"questionId": "q117",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "6528",
"actual": "6528",
"correct": true,
"inputTokens": 3110,
"outputTokens": 6,
"latencyMs": 953.3122500000172
},
{
"questionId": "q118",
"format": "json",
"model": "gpt-5-nano",
"expected": "1136.09",
"actual": "1136.09",
"correct": true,
"inputTokens": 3711,
"outputTokens": 138,
"latencyMs": 3435.850167000026
},
{
"questionId": "q118",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "1136.09",
"actual": "1136.09",
"correct": true,
"inputTokens": 4079,
"outputTokens": 8,
"latencyMs": 1110.8856249999953
},
{
"questionId": "q118",
"format": "toon",
"model": "gpt-5-nano",
"expected": "1136.09",
"actual": "1136.09",
"correct": true,
"inputTokens": 1562,
"outputTokens": 266,
"latencyMs": 3303.3427500000107
},
{
"questionId": "q118",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "1136.09",
"actual": "1136.09",
"correct": true,
"inputTokens": 1508,
"outputTokens": 8,
"latencyMs": 954.5857910000486
},
{
"questionId": "q118",
"format": "csv",
"model": "gpt-5-nano",
"expected": "1136.09",
"actual": "1136.09",
"correct": true,
"inputTokens": 1440,
"outputTokens": 138,
"latencyMs": 5035.666582999984
},
{
"questionId": "q118",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "1136.09",
"actual": "1136.09",
"correct": true,
"inputTokens": 1444,
"outputTokens": 8,
"latencyMs": 867.9529159999802
},
{
"questionId": "q118",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "1136.09",
"actual": "1136.09",
"correct": true,
"inputTokens": 3828,
"outputTokens": 202,
"latencyMs": 2817.1118750000023
},
{
"questionId": "q118",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "1136.09",
"actual": "1136.09",
"correct": true,
"inputTokens": 3414,
"outputTokens": 8,
"latencyMs": 1029.4406660000095
},
{
"questionId": "q118",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "1136.09",
"actual": "1136.09",
"correct": true,
"inputTokens": 2984,
"outputTokens": 138,
"latencyMs": 2521.28145900002
},
{
"questionId": "q118",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "1136.09",
"actual": "1136.09",
"correct": true,
"inputTokens": 3109,
"outputTokens": 8,
"latencyMs": 1266.9695000000065
},
{
"questionId": "q119",
"format": "json",
"model": "gpt-5-nano",
"expected": "4689",
"actual": "4689",
"correct": true,
"inputTokens": 3712,
"outputTokens": 72,
"latencyMs": 2383.6225830000476
},
{
"questionId": "q119",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "4689",
"actual": "4689",
"correct": true,
"inputTokens": 4080,
"outputTokens": 6,
"latencyMs": 1100.3007499999949
},
{
"questionId": "q119",
"format": "toon",
"model": "gpt-5-nano",
"expected": "4689",
"actual": "4689",
"correct": true,
"inputTokens": 1563,
"outputTokens": 200,
"latencyMs": 2816.252374999982
},
{
"questionId": "q119",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "4689",
"actual": "4689",
"correct": true,
"inputTokens": 1509,
"outputTokens": 6,
"latencyMs": 1030.0248330000322
},
{
"questionId": "q119",
"format": "csv",
"model": "gpt-5-nano",
"expected": "4689",
"actual": "4689",
"correct": true,
"inputTokens": 1441,
"outputTokens": 72,
"latencyMs": 1819.5161669999943
},
{
"questionId": "q119",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "4689",
"actual": "4689",
"correct": true,
"inputTokens": 1445,
"outputTokens": 6,
"latencyMs": 1012.0581670000101
},
{
"questionId": "q119",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "4689",
"actual": "4689",
"correct": true,
"inputTokens": 3829,
"outputTokens": 136,
"latencyMs": 2960.8910000000033
},
{
"questionId": "q119",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "4689",
"actual": "4689",
"correct": true,
"inputTokens": 3415,
"outputTokens": 6,
"latencyMs": 1346.7110000000102
},
{
"questionId": "q119",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "4689",
"actual": "4689",
"correct": true,
"inputTokens": 2985,
"outputTokens": 136,
"latencyMs": 3081.40625
},
{
"questionId": "q119",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "4689",
"actual": "4689",
"correct": true,
"inputTokens": 3110,
"outputTokens": 6,
"latencyMs": 1485.0133330000099
},
{
"questionId": "q120",
"format": "json",
"model": "gpt-5-nano",
"expected": "2637.73",
"actual": "2637.73",
"correct": true,
"inputTokens": 3711,
"outputTokens": 138,
"latencyMs": 3632.860875000013
},
{
"questionId": "q120",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "2637.73",
"actual": "2637.73",
"correct": true,
"inputTokens": 4079,
"outputTokens": 8,
"latencyMs": 1224.803750000021
},
{
"questionId": "q120",
"format": "toon",
"model": "gpt-5-nano",
"expected": "2637.73",
"actual": "2637.73",
"correct": true,
"inputTokens": 1562,
"outputTokens": 138,
"latencyMs": 2323.675958000007
},
{
"questionId": "q120",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "2637.73",
"actual": "2637.73",
"correct": true,
"inputTokens": 1508,
"outputTokens": 8,
"latencyMs": 1114.0831669999752
},
{
"questionId": "q120",
"format": "csv",
"model": "gpt-5-nano",
"expected": "2637.73",
"actual": "2637.73",
"correct": true,
"inputTokens": 1440,
"outputTokens": 202,
"latencyMs": 3465.111333000008
},
{
"questionId": "q120",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "2637.73",
"actual": "2637.73",
"correct": true,
"inputTokens": 1444,
"outputTokens": 8,
"latencyMs": 1082.4990419999813
},
{
"questionId": "q120",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "2637.73",
"actual": "2637.73",
"correct": true,
"inputTokens": 3828,
"outputTokens": 138,
"latencyMs": 5648.285415999999
},
{
"questionId": "q120",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "2637.73",
"actual": "2637.73",
"correct": true,
"inputTokens": 3414,
"outputTokens": 8,
"latencyMs": 1087.8757500000065
},
{
"questionId": "q120",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "2637.73",
"actual": "2637.73",
"correct": true,
"inputTokens": 2984,
"outputTokens": 138,
"latencyMs": 4587.399166000017
},
{
"questionId": "q120",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "2637.73",
"actual": "2637.73",
"correct": true,
"inputTokens": 3109,
"outputTokens": 8,
"latencyMs": 1007.4333340000012
},
{
"questionId": "q121",
"format": "json",
"model": "gpt-5-nano",
"expected": "5685",
"actual": "5685",
"correct": true,
"inputTokens": 3712,
"outputTokens": 72,
"latencyMs": 2307.9398339999607
},
{
"questionId": "q121",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "5685",
"actual": "5685",
"correct": true,
"inputTokens": 4080,
"outputTokens": 6,
"latencyMs": 2368.3719580000034
},
{
"questionId": "q121",
"format": "toon",
"model": "gpt-5-nano",
"expected": "5685",
"actual": "5685",
"correct": true,
"inputTokens": 1563,
"outputTokens": 200,
"latencyMs": 3587.720166999963
},
{
"questionId": "q121",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "5685",
"actual": "5685",
"correct": true,
"inputTokens": 1509,
"outputTokens": 6,
"latencyMs": 1053.9867080000113
},
{
"questionId": "q121",
"format": "csv",
"model": "gpt-5-nano",
"expected": "5685",
"actual": "5685",
"correct": true,
"inputTokens": 1441,
"outputTokens": 136,
"latencyMs": 1593.4699169999803
},
{
"questionId": "q121",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "5685",
"actual": "5685",
"correct": true,
"inputTokens": 1445,
"outputTokens": 6,
"latencyMs": 2256.4729170000064
},
{
"questionId": "q121",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "5685",
"actual": "5685",
"correct": true,
"inputTokens": 3829,
"outputTokens": 200,
"latencyMs": 4466.158916999993
},
{
"questionId": "q121",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "5685",
"actual": "5685",
"correct": true,
"inputTokens": 3415,
"outputTokens": 6,
"latencyMs": 1305.1236670000362
},
{
"questionId": "q121",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "5685",
"actual": "5685",
"correct": true,
"inputTokens": 2985,
"outputTokens": 136,
"latencyMs": 3014.9748339999933
},
{
"questionId": "q121",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "5685",
"actual": "5685",
"correct": true,
"inputTokens": 3110,
"outputTokens": 6,
"latencyMs": 1421.9597920000087
},
{
"questionId": "q122",
"format": "json",
"model": "gpt-5-nano",
"expected": "3421.06",
"actual": "3421.06",
"correct": true,
"inputTokens": 3711,
"outputTokens": 202,
"latencyMs": 19503.25695900002
},
{
"questionId": "q122",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "3421.06",
"actual": "3421.06",
"correct": true,
"inputTokens": 4079,
"outputTokens": 8,
"latencyMs": 1164.002959000005
},
{
"questionId": "q122",
"format": "toon",
"model": "gpt-5-nano",
"expected": "3421.06",
"actual": "3421.06",
"correct": true,
"inputTokens": 1562,
"outputTokens": 330,
"latencyMs": 4662.637042000017
},
{
"questionId": "q122",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "3421.06",
"actual": "3421.06",
"correct": true,
"inputTokens": 1508,
"outputTokens": 8,
"latencyMs": 1086.9569170000032
},
{
"questionId": "q122",
"format": "csv",
"model": "gpt-5-nano",
"expected": "3421.06",
"actual": "3421.06",
"correct": true,
"inputTokens": 1440,
"outputTokens": 202,
"latencyMs": 2683.73904200003
},
{
"questionId": "q122",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "3421.06",
"actual": "3421.06",
"correct": true,
"inputTokens": 1444,
"outputTokens": 8,
"latencyMs": 2289.0300419999985
},
{
"questionId": "q122",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "3421.06",
"actual": "3421.06",
"correct": true,
"inputTokens": 3828,
"outputTokens": 74,
"latencyMs": 1877.1760409999988
},
{
"questionId": "q122",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "3421.06",
"actual": "3421.06",
"correct": true,
"inputTokens": 3414,
"outputTokens": 8,
"latencyMs": 1460.1729160000104
},
{
"questionId": "q122",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "3421.06",
"actual": "3421.06",
"correct": true,
"inputTokens": 2984,
"outputTokens": 138,
"latencyMs": 2582.983708999993
},
{
"questionId": "q122",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "3421.06",
"actual": "3421.06",
"correct": true,
"inputTokens": 3109,
"outputTokens": 8,
"latencyMs": 1014.1320839999826
},
{
"questionId": "q123",
"format": "json",
"model": "gpt-5-nano",
"expected": "344498",
"actual": "344498",
"correct": true,
"inputTokens": 3709,
"outputTokens": 2376,
"latencyMs": 26290.846458000015
},
{
"questionId": "q123",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "344498",
"actual": "188,945",
"correct": false,
"inputTokens": 4077,
"outputTokens": 7,
"latencyMs": 1288.6627500000177
},
{
"questionId": "q123",
"format": "toon",
"model": "gpt-5-nano",
"expected": "344498",
"actual": "344498",
"correct": true,
"inputTokens": 1560,
"outputTokens": 1736,
"latencyMs": 13565.930124999955
},
{
"questionId": "q123",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "344498",
"actual": "337,045",
"correct": false,
"inputTokens": 1506,
"outputTokens": 7,
"latencyMs": 1190.8501249999972
},
{
"questionId": "q123",
"format": "csv",
"model": "gpt-5-nano",
"expected": "344498",
"actual": "344498",
"correct": true,
"inputTokens": 1438,
"outputTokens": 2888,
"latencyMs": 21377.612083000015
},
{
"questionId": "q123",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "344498",
"actual": "372,915",
"correct": false,
"inputTokens": 1442,
"outputTokens": 7,
"latencyMs": 931.349749999994
},
{
"questionId": "q123",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "344498",
"actual": "344498",
"correct": true,
"inputTokens": 3826,
"outputTokens": 3208,
"latencyMs": 18997.804958999972
},
{
"questionId": "q123",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "344498",
"actual": "188,647",
"correct": false,
"inputTokens": 3412,
"outputTokens": 7,
"latencyMs": 1185.3518330000225
},
{
"questionId": "q123",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "344498",
"actual": "344498",
"correct": true,
"inputTokens": 2982,
"outputTokens": 2184,
"latencyMs": 23924.366792000015
},
{
"questionId": "q123",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "344498",
"actual": "181,854",
"correct": false,
"inputTokens": 3107,
"outputTokens": 7,
"latencyMs": 2958.913666999957
},
{
"questionId": "q124",
"format": "json",
"model": "gpt-5-nano",
"expected": "312818.50",
"actual": "312818.50",
"correct": true,
"inputTokens": 3707,
"outputTokens": 4170,
"latencyMs": 29361.525874999992
},
{
"questionId": "q124",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "312818.50",
"actual": "287,745.89",
"correct": false,
"inputTokens": 4075,
"outputTokens": 9,
"latencyMs": 1325.5311249999795
},
{
"questionId": "q124",
"format": "toon",
"model": "gpt-5-nano",
"expected": "312818.50",
"actual": "312818.50",
"correct": true,
"inputTokens": 1558,
"outputTokens": 4106,
"latencyMs": 37997.09958400001
},
{
"questionId": "q124",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "312818.50",
"actual": "487,891.45",
"correct": false,
"inputTokens": 1504,
"outputTokens": 9,
"latencyMs": 1184.0957090000156
},
{
"questionId": "q124",
"format": "csv",
"model": "gpt-5-nano",
"expected": "312818.50",
"actual": "312818.50",
"correct": true,
"inputTokens": 1436,
"outputTokens": 3658,
"latencyMs": 26945.63508400001
},
{
"questionId": "q124",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "312818.50",
"actual": "487,891.89",
"correct": false,
"inputTokens": 1440,
"outputTokens": 9,
"latencyMs": 1162.16949999996
},
{
"questionId": "q124",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "312818.50",
"actual": "312818.50",
"correct": true,
"inputTokens": 3824,
"outputTokens": 3722,
"latencyMs": 27321.698167000024
},
{
"questionId": "q124",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "312818.50",
"actual": "381,968.89",
"correct": false,
"inputTokens": 3410,
"outputTokens": 9,
"latencyMs": 2065.7583339999546
},
{
"questionId": "q124",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "312818.50",
"actual": "312818.50",
"correct": true,
"inputTokens": 2980,
"outputTokens": 3658,
"latencyMs": 28778.99891600001
},
{
"questionId": "q124",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "312818.50",
"actual": "381,847.89",
"correct": false,
"inputTokens": 3105,
"outputTokens": 9,
"latencyMs": 1233.4267090000212
},
{
"questionId": "q125",
"format": "json",
"model": "gpt-5-nano",
"expected": "1811",
"actual": "1811",
"correct": true,
"inputTokens": 3709,
"outputTokens": 2568,
"latencyMs": 28626.692666999996
},
{
"questionId": "q125",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "1811",
"actual": "1,234",
"correct": false,
"inputTokens": 4078,
"outputTokens": 7,
"latencyMs": 1133.735584000009
},
{
"questionId": "q125",
"format": "toon",
"model": "gpt-5-nano",
"expected": "1811",
"actual": "1811",
"correct": true,
"inputTokens": 1560,
"outputTokens": 1672,
"latencyMs": 14898.688125000044
},
{
"questionId": "q125",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "1811",
"actual": "1,945",
"correct": false,
"inputTokens": 1507,
"outputTokens": 7,
"latencyMs": 1178.2744999999995
},
{
"questionId": "q125",
"format": "csv",
"model": "gpt-5-nano",
"expected": "1811",
"actual": "1811",
"correct": true,
"inputTokens": 1438,
"outputTokens": 1864,
"latencyMs": 15225.964540999965
},
{
"questionId": "q125",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "1811",
"actual": "1,945",
"correct": false,
"inputTokens": 1443,
"outputTokens": 7,
"latencyMs": 1077.2695419999654
},
{
"questionId": "q125",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "1811",
"actual": "1811",
"correct": true,
"inputTokens": 3826,
"outputTokens": 1928,
"latencyMs": 14057.434583000024
},
{
"questionId": "q125",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "1811",
"actual": "1,454",
"correct": false,
"inputTokens": 3413,
"outputTokens": 7,
"latencyMs": 1177.537500000035
},
{
"questionId": "q125",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "1811",
"actual": "1811",
"correct": true,
"inputTokens": 2982,
"outputTokens": 2312,
"latencyMs": 19125.74099999998
},
{
"questionId": "q125",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "1811",
"actual": "1,454",
"correct": false,
"inputTokens": 3108,
"outputTokens": 7,
"latencyMs": 1047.243833000015
},
{
"questionId": "q126",
"format": "json",
"model": "gpt-5-nano",
"expected": "42",
"actual": "42",
"correct": true,
"inputTokens": 3709,
"outputTokens": 1735,
"latencyMs": 14875.021707999986
},
{
"questionId": "q126",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "42",
"actual": "42",
"correct": true,
"inputTokens": 4078,
"outputTokens": 5,
"latencyMs": 1076.5694999999832
},
{
"questionId": "q126",
"format": "toon",
"model": "gpt-5-nano",
"expected": "42",
"actual": "42",
"correct": true,
"inputTokens": 1560,
"outputTokens": 2823,
"latencyMs": 22604.422416999994
},
{
"questionId": "q126",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "42",
"actual": "42",
"correct": true,
"inputTokens": 1507,
"outputTokens": 5,
"latencyMs": 1451.705666999973
},
{
"questionId": "q126",
"format": "csv",
"model": "gpt-5-nano",
"expected": "42",
"actual": "42",
"correct": true,
"inputTokens": 1438,
"outputTokens": 2183,
"latencyMs": 16916.007042000012
},
{
"questionId": "q126",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "42",
"actual": "42",
"correct": true,
"inputTokens": 1443,
"outputTokens": 5,
"latencyMs": 1103.1098750000237
},
{
"questionId": "q126",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "42",
"actual": "42",
"correct": true,
"inputTokens": 3826,
"outputTokens": 2055,
"latencyMs": 17162.629124999978
},
{
"questionId": "q126",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "42",
"actual": "47",
"correct": false,
"inputTokens": 3413,
"outputTokens": 5,
"latencyMs": 1150.0435000000289
},
{
"questionId": "q126",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "42",
"actual": "42",
"correct": true,
"inputTokens": 2982,
"outputTokens": 1607,
"latencyMs": 14835.323333000008
},
{
"questionId": "q126",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "42",
"actual": "47",
"correct": false,
"inputTokens": 3108,
"outputTokens": 5,
"latencyMs": 1206.8219590000226
},
{
"questionId": "q127",
"format": "json",
"model": "gpt-5-nano",
"expected": "28",
"actual": "28",
"correct": true,
"inputTokens": 3709,
"outputTokens": 1479,
"latencyMs": 11560.967958000023
},
{
"questionId": "q127",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "28",
"actual": "24",
"correct": false,
"inputTokens": 4078,
"outputTokens": 5,
"latencyMs": 1151.9984169999952
},
{
"questionId": "q127",
"format": "toon",
"model": "gpt-5-nano",
"expected": "28",
"actual": "28",
"correct": true,
"inputTokens": 1560,
"outputTokens": 1927,
"latencyMs": 15431.08262499998
},
{
"questionId": "q127",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "28",
"actual": "26",
"correct": false,
"inputTokens": 1507,
"outputTokens": 5,
"latencyMs": 1032.7485419999575
},
{
"questionId": "q127",
"format": "csv",
"model": "gpt-5-nano",
"expected": "28",
"actual": "28",
"correct": true,
"inputTokens": 1438,
"outputTokens": 1607,
"latencyMs": 9425.883957999991
},
{
"questionId": "q127",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "28",
"actual": "23",
"correct": false,
"inputTokens": 1443,
"outputTokens": 5,
"latencyMs": 943.5942919999943
},
{
"questionId": "q127",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "28",
"actual": "28",
"correct": true,
"inputTokens": 3826,
"outputTokens": 1927,
"latencyMs": 16529.66529199999
},
{
"questionId": "q127",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "28",
"actual": "24",
"correct": false,
"inputTokens": 3413,
"outputTokens": 5,
"latencyMs": 1107.5635419999599
},
{
"questionId": "q127",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "28",
"actual": "28",
"correct": true,
"inputTokens": 2982,
"outputTokens": 1863,
"latencyMs": 21071.067082999973
},
{
"questionId": "q127",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "28",
"actual": "23",
"correct": false,
"inputTokens": 3108,
"outputTokens": 5,
"latencyMs": 1018.46212500002
},
{
"questionId": "q128",
"format": "json",
"model": "gpt-5-nano",
"expected": "11",
"actual": "11",
"correct": true,
"inputTokens": 3709,
"outputTokens": 1223,
"latencyMs": 8242.37608300004
},
{
"questionId": "q128",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "11",
"actual": "11",
"correct": true,
"inputTokens": 4078,
"outputTokens": 5,
"latencyMs": 1052.7201249999925
},
{
"questionId": "q128",
"format": "toon",
"model": "gpt-5-nano",
"expected": "11",
"actual": "11",
"correct": true,
"inputTokens": 1560,
"outputTokens": 903,
"latencyMs": 5430.806291999994
},
{
"questionId": "q128",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "11",
"actual": "12",
"correct": false,
"inputTokens": 1507,
"outputTokens": 5,
"latencyMs": 2354.328999999969
},
{
"questionId": "q128",
"format": "csv",
"model": "gpt-5-nano",
"expected": "11",
"actual": "11",
"correct": true,
"inputTokens": 1438,
"outputTokens": 1607,
"latencyMs": 21944.211458000005
},
{
"questionId": "q128",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "11",
"actual": "11",
"correct": true,
"inputTokens": 1443,
"outputTokens": 5,
"latencyMs": 1249.9959590000217
},
{
"questionId": "q128",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "11",
"actual": "11",
"correct": true,
"inputTokens": 3826,
"outputTokens": 1415,
"latencyMs": 15465.409875000012
},
{
"questionId": "q128",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "11",
"actual": "11",
"correct": true,
"inputTokens": 3413,
"outputTokens": 5,
"latencyMs": 1131.9575830000103
},
{
"questionId": "q128",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "11",
"actual": "11",
"correct": true,
"inputTokens": 2982,
"outputTokens": 2503,
"latencyMs": 24744.971958999988
},
{
"questionId": "q128",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "11",
"actual": "11",
"correct": true,
"inputTokens": 3108,
"outputTokens": 5,
"latencyMs": 1274.6952499999898
},
{
"questionId": "q129",
"format": "json",
"model": "gpt-5-nano",
"expected": "58",
"actual": "58",
"correct": true,
"inputTokens": 3708,
"outputTokens": 1351,
"latencyMs": 12546.867542000022
},
{
"questionId": "q129",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "58",
"actual": "50",
"correct": false,
"inputTokens": 4078,
"outputTokens": 5,
"latencyMs": 1231.453749999986
},
{
"questionId": "q129",
"format": "toon",
"model": "gpt-5-nano",
"expected": "58",
"actual": "58",
"correct": true,
"inputTokens": 1559,
"outputTokens": 1543,
"latencyMs": 16593.402166999993
},
{
"questionId": "q129",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "58",
"actual": "47",
"correct": false,
"inputTokens": 1507,
"outputTokens": 5,
"latencyMs": 1079.0991659999709
},
{
"questionId": "q129",
"format": "csv",
"model": "gpt-5-nano",
"expected": "58",
"actual": "58",
"correct": true,
"inputTokens": 1437,
"outputTokens": 1543,
"latencyMs": 10956.456084000005
},
{
"questionId": "q129",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "58",
"actual": "54",
"correct": false,
"inputTokens": 1443,
"outputTokens": 5,
"latencyMs": 2018.3774170000106
},
{
"questionId": "q129",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "58",
"actual": "58",
"correct": true,
"inputTokens": 3825,
"outputTokens": 1351,
"latencyMs": 10537.598500000022
},
{
"questionId": "q129",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "58",
"actual": "47",
"correct": false,
"inputTokens": 3413,
"outputTokens": 5,
"latencyMs": 1039.2452080000076
},
{
"questionId": "q129",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "58",
"actual": "58",
"correct": true,
"inputTokens": 2981,
"outputTokens": 839,
"latencyMs": 8039.237708000001
},
{
"questionId": "q129",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "58",
"actual": "54",
"correct": false,
"inputTokens": 3108,
"outputTokens": 5,
"latencyMs": 1264.6740829999908
},
{
"questionId": "q130",
"format": "json",
"model": "gpt-5-nano",
"expected": "41",
"actual": "41",
"correct": true,
"inputTokens": 3708,
"outputTokens": 1863,
"latencyMs": 14310.697374999989
},
{
"questionId": "q130",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "41",
"actual": "31",
"correct": false,
"inputTokens": 4078,
"outputTokens": 5,
"latencyMs": 1138.4443339999998
},
{
"questionId": "q130",
"format": "toon",
"model": "gpt-5-nano",
"expected": "41",
"actual": "41",
"correct": true,
"inputTokens": 1559,
"outputTokens": 1927,
"latencyMs": 16487.508375000034
},
{
"questionId": "q130",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "41",
"actual": "38",
"correct": false,
"inputTokens": 1507,
"outputTokens": 5,
"latencyMs": 1104.2365410000202
},
{
"questionId": "q130",
"format": "csv",
"model": "gpt-5-nano",
"expected": "41",
"actual": "41",
"correct": true,
"inputTokens": 1437,
"outputTokens": 3015,
"latencyMs": 23688.737208999984
},
{
"questionId": "q130",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "41",
"actual": "38",
"correct": false,
"inputTokens": 1443,
"outputTokens": 5,
"latencyMs": 1026.8166249999776
},
{
"questionId": "q130",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "41",
"actual": "41",
"correct": true,
"inputTokens": 3825,
"outputTokens": 1671,
"latencyMs": 12415.87070899998
},
{
"questionId": "q130",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "41",
"actual": "31",
"correct": false,
"inputTokens": 3413,
"outputTokens": 5,
"latencyMs": 1062.2278749999823
},
{
"questionId": "q130",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "41",
"actual": "41",
"correct": true,
"inputTokens": 2981,
"outputTokens": 1799,
"latencyMs": 15901.829415999993
},
{
"questionId": "q130",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "41",
"actual": "31",
"correct": false,
"inputTokens": 3108,
"outputTokens": 5,
"latencyMs": 1051.6962910000002
},
{
"questionId": "q131",
"format": "json",
"model": "gpt-5-nano",
"expected": "23",
"actual": "23",
"correct": true,
"inputTokens": 3708,
"outputTokens": 1863,
"latencyMs": 15216.926500000001
},
{
"questionId": "q131",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "23",
"actual": "20",
"correct": false,
"inputTokens": 4078,
"outputTokens": 5,
"latencyMs": 1460.9212079999852
},
{
"questionId": "q131",
"format": "toon",
"model": "gpt-5-nano",
"expected": "23",
"actual": "23",
"correct": true,
"inputTokens": 1559,
"outputTokens": 2567,
"latencyMs": 27103.083999999973
},
{
"questionId": "q131",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "23",
"actual": "20",
"correct": false,
"inputTokens": 1507,
"outputTokens": 5,
"latencyMs": 1101.5416669999831
},
{
"questionId": "q131",
"format": "csv",
"model": "gpt-5-nano",
"expected": "23",
"actual": "23",
"correct": true,
"inputTokens": 1437,
"outputTokens": 1543,
"latencyMs": 14598.558207999973
},
{
"questionId": "q131",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "23",
"actual": "20",
"correct": false,
"inputTokens": 1443,
"outputTokens": 5,
"latencyMs": 1270.7722910000011
},
{
"questionId": "q131",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "23",
"actual": "23",
"correct": true,
"inputTokens": 3825,
"outputTokens": 1415,
"latencyMs": 14102.604708999977
},
{
"questionId": "q131",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "23",
"actual": "21",
"correct": false,
"inputTokens": 3413,
"outputTokens": 5,
"latencyMs": 1251.4159170000348
},
{
"questionId": "q131",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "23",
"actual": "23",
"correct": true,
"inputTokens": 2981,
"outputTokens": 1799,
"latencyMs": 18696.684999999998
},
{
"questionId": "q131",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "23",
"actual": "21",
"correct": false,
"inputTokens": 3108,
"outputTokens": 5,
"latencyMs": 1170.9401669999934
},
{
"questionId": "q132",
"format": "json",
"model": "gpt-5-nano",
"expected": "430828",
"actual": "430828",
"correct": true,
"inputTokens": 15187,
"outputTokens": 136,
"latencyMs": 2872.1482499999693
},
{
"questionId": "q132",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "430828",
"actual": "430828",
"correct": true,
"inputTokens": 17409,
"outputTokens": 6,
"latencyMs": 1382.586333000043
},
{
"questionId": "q132",
"format": "toon",
"model": "gpt-5-nano",
"expected": "430828",
"actual": "430828",
"correct": true,
"inputTokens": 8788,
"outputTokens": 904,
"latencyMs": 9130.657125000027
},
{
"questionId": "q132",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "430828",
"actual": "430828",
"correct": true,
"inputTokens": 9279,
"outputTokens": 6,
"latencyMs": 1164.3372080000117
},
{
"questionId": "q132",
"format": "csv",
"model": "gpt-5-nano",
"expected": "430828",
"actual": "430828",
"correct": true,
"inputTokens": 8556,
"outputTokens": 648,
"latencyMs": 7763.659999999974
},
{
"questionId": "q132",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "430828",
"actual": "430828",
"correct": true,
"inputTokens": 9125,
"outputTokens": 6,
"latencyMs": 1331.3139999999548
},
{
"questionId": "q132",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "430828",
"actual": "430828",
"correct": true,
"inputTokens": 15481,
"outputTokens": 584,
"latencyMs": 9411.661499999987
},
{
"questionId": "q132",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "430828",
"actual": "430828",
"correct": true,
"inputTokens": 15367,
"outputTokens": 6,
"latencyMs": 1272.1991249999846
},
{
"questionId": "q132",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "430828",
"actual": "430828",
"correct": true,
"inputTokens": 13171,
"outputTokens": 200,
"latencyMs": 3587.8712090000045
},
{
"questionId": "q132",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "430828",
"actual": "430828",
"correct": true,
"inputTokens": 14483,
"outputTokens": 6,
"latencyMs": 1710.5899999999674
},
{
"questionId": "q133",
"format": "json",
"model": "gpt-5-nano",
"expected": "11798",
"actual": "11798",
"correct": true,
"inputTokens": 15189,
"outputTokens": 328,
"latencyMs": 3625.780167000019
},
{
"questionId": "q133",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "11798",
"actual": "11798",
"correct": true,
"inputTokens": 17410,
"outputTokens": 6,
"latencyMs": 1785.2782080000034
},
{
"questionId": "q133",
"format": "toon",
"model": "gpt-5-nano",
"expected": "11798",
"actual": "11798",
"correct": true,
"inputTokens": 8790,
"outputTokens": 712,
"latencyMs": 6381.770374999964
},
{
"questionId": "q133",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "11798",
"actual": "11798",
"correct": true,
"inputTokens": 9280,
"outputTokens": 6,
"latencyMs": 1352.5436660000123
},
{
"questionId": "q133",
"format": "csv",
"model": "gpt-5-nano",
"expected": "11798",
"actual": "11798",
"correct": true,
"inputTokens": 8558,
"outputTokens": 520,
"latencyMs": 27916.417874999985
},
{
"questionId": "q133",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "11798",
"actual": "11798",
"correct": true,
"inputTokens": 9126,
"outputTokens": 6,
"latencyMs": 2073.8068330000388
},
{
"questionId": "q133",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "11798",
"actual": "11798",
"correct": true,
"inputTokens": 15483,
"outputTokens": 328,
"latencyMs": 5943.872542000026
},
{
"questionId": "q133",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "11798",
"actual": "11798",
"correct": true,
"inputTokens": 15368,
"outputTokens": 6,
"latencyMs": 1767.4393339999951
},
{
"questionId": "q133",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "11798",
"actual": "11798",
"correct": true,
"inputTokens": 13173,
"outputTokens": 264,
"latencyMs": 3115.895124999981
},
{
"questionId": "q133",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "11798",
"actual": "11798",
"correct": true,
"inputTokens": 14484,
"outputTokens": 6,
"latencyMs": 1183.2249999999767
},
{
"questionId": "q134",
"format": "json",
"model": "gpt-5-nano",
"expected": "183631",
"actual": "183631",
"correct": true,
"inputTokens": 15192,
"outputTokens": 392,
"latencyMs": 4991.646125000028
},
{
"questionId": "q134",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "183631",
"actual": "183631",
"correct": true,
"inputTokens": 17412,
"outputTokens": 6,
"latencyMs": 1835.4077919999836
},
{
"questionId": "q134",
"format": "toon",
"model": "gpt-5-nano",
"expected": "183631",
"actual": "183631",
"correct": true,
"inputTokens": 8793,
"outputTokens": 712,
"latencyMs": 7788.013291999989
},
{
"questionId": "q134",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "183631",
"actual": "183631",
"correct": true,
"inputTokens": 9282,
"outputTokens": 6,
"latencyMs": 1082.4066669999738
},
{
"questionId": "q134",
"format": "csv",
"model": "gpt-5-nano",
"expected": "183631",
"actual": "183631",
"correct": true,
"inputTokens": 8561,
"outputTokens": 520,
"latencyMs": 5664.896500000032
},
{
"questionId": "q134",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "183631",
"actual": "183631",
"correct": true,
"inputTokens": 9128,
"outputTokens": 6,
"latencyMs": 1215.8875830000034
},
{
"questionId": "q134",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "183631",
"actual": "183631",
"correct": true,
"inputTokens": 15486,
"outputTokens": 456,
"latencyMs": 5141.449292000034
},
{
"questionId": "q134",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "183631",
"actual": "183631",
"correct": true,
"inputTokens": 15370,
"outputTokens": 6,
"latencyMs": 1483.2090420000022
},
{
"questionId": "q134",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "183631",
"actual": "183631",
"correct": true,
"inputTokens": 13176,
"outputTokens": 328,
"latencyMs": 7532.760624999995
},
{
"questionId": "q134",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "183631",
"actual": "183631",
"correct": true,
"inputTokens": 14486,
"outputTokens": 6,
"latencyMs": 1458.0657500000088
},
{
"questionId": "q135",
"format": "json",
"model": "gpt-5-nano",
"expected": "29246",
"actual": "29246",
"correct": true,
"inputTokens": 15191,
"outputTokens": 392,
"latencyMs": 7922.4705829999875
},
{
"questionId": "q135",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "29246",
"actual": "29246",
"correct": true,
"inputTokens": 17412,
"outputTokens": 6,
"latencyMs": 1510.0054579999996
},
{
"questionId": "q135",
"format": "toon",
"model": "gpt-5-nano",
"expected": "29246",
"actual": "29246",
"correct": true,
"inputTokens": 8792,
"outputTokens": 776,
"latencyMs": 8475.77466699999
},
{
"questionId": "q135",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "29246",
"actual": "29246",
"correct": true,
"inputTokens": 9282,
"outputTokens": 6,
"latencyMs": 1203.3620419999934
},
{
"questionId": "q135",
"format": "csv",
"model": "gpt-5-nano",
"expected": "29246",
"actual": "29246",
"correct": true,
"inputTokens": 8560,
"outputTokens": 776,
"latencyMs": 7283.84258300002
},
{
"questionId": "q135",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "29246",
"actual": "29246",
"correct": true,
"inputTokens": 9128,
"outputTokens": 6,
"latencyMs": 1365.2434169999906
},
{
"questionId": "q135",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "29246",
"actual": "29246",
"correct": true,
"inputTokens": 15485,
"outputTokens": 520,
"latencyMs": 5846.538916999998
},
{
"questionId": "q135",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "29246",
"actual": "29246",
"correct": true,
"inputTokens": 15370,
"outputTokens": 6,
"latencyMs": 1203.6220829999656
},
{
"questionId": "q135",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "29246",
"actual": "29246",
"correct": true,
"inputTokens": 13175,
"outputTokens": 456,
"latencyMs": 5973.848832999996
},
{
"questionId": "q135",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "29246",
"actual": "29246",
"correct": true,
"inputTokens": 14486,
"outputTokens": 6,
"latencyMs": 1189.811875000014
},
{
"questionId": "q136",
"format": "json",
"model": "gpt-5-nano",
"expected": "135306",
"actual": "135306",
"correct": true,
"inputTokens": 15187,
"outputTokens": 328,
"latencyMs": 8872.252957999997
},
{
"questionId": "q136",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "135306",
"actual": "135306",
"correct": true,
"inputTokens": 17407,
"outputTokens": 6,
"latencyMs": 1775.476083000016
},
{
"questionId": "q136",
"format": "toon",
"model": "gpt-5-nano",
"expected": "135306",
"actual": "135306",
"correct": true,
"inputTokens": 8788,
"outputTokens": 648,
"latencyMs": 7149.649291000038
},
{
"questionId": "q136",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "135306",
"actual": "135306",
"correct": true,
"inputTokens": 9277,
"outputTokens": 6,
"latencyMs": 1577.2079999999842
},
{
"questionId": "q136",
"format": "csv",
"model": "gpt-5-nano",
"expected": "135306",
"actual": "135306",
"correct": true,
"inputTokens": 8556,
"outputTokens": 1288,
"latencyMs": 11344.462834000005
},
{
"questionId": "q136",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "135306",
"actual": "135306",
"correct": true,
"inputTokens": 9123,
"outputTokens": 6,
"latencyMs": 1340.27887499996
},
{
"questionId": "q136",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "135306",
"actual": "135306",
"correct": true,
"inputTokens": 15481,
"outputTokens": 392,
"latencyMs": 6256.696250000037
},
{
"questionId": "q136",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "135306",
"actual": "135306",
"correct": true,
"inputTokens": 15365,
"outputTokens": 6,
"latencyMs": 1604.6909999999916
},
{
"questionId": "q136",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "135306",
"actual": "135306",
"correct": true,
"inputTokens": 13171,
"outputTokens": 456,
"latencyMs": 5982.022666999954
},
{
"questionId": "q136",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "135306",
"actual": "135306",
"correct": true,
"inputTokens": 14481,
"outputTokens": 6,
"latencyMs": 1259.2409589999588
},
{
"questionId": "q137",
"format": "json",
"model": "gpt-5-nano",
"expected": "24914",
"actual": "24914",
"correct": true,
"inputTokens": 15186,
"outputTokens": 200,
"latencyMs": 2858.1693749999977
},
{
"questionId": "q137",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "24914",
"actual": "24914",
"correct": true,
"inputTokens": 17408,
"outputTokens": 6,
"latencyMs": 1786.5725000000093
},
{
"questionId": "q137",
"format": "toon",
"model": "gpt-5-nano",
"expected": "24914",
"actual": "24914",
"correct": true,
"inputTokens": 8787,
"outputTokens": 2696,
"latencyMs": 23868.72975
},
{
"questionId": "q137",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "24914",
"actual": "24914",
"correct": true,
"inputTokens": 9278,
"outputTokens": 6,
"latencyMs": 1116.0275000000256
},
{
"questionId": "q137",
"format": "csv",
"model": "gpt-5-nano",
"expected": "24914",
"actual": "0",
"correct": false,
"inputTokens": 8555,
"outputTokens": 1543,
"latencyMs": 17006.341916999954
},
{
"questionId": "q137",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "24914",
"actual": "24914",
"correct": true,
"inputTokens": 9124,
"outputTokens": 6,
"latencyMs": 1425.7799160000286
},
{
"questionId": "q137",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "24914",
"actual": "24914",
"correct": true,
"inputTokens": 15480,
"outputTokens": 648,
"latencyMs": 8414.583791000012
},
{
"questionId": "q137",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "24914",
"actual": "24914",
"correct": true,
"inputTokens": 15366,
"outputTokens": 6,
"latencyMs": 1374.9217920000083
},
{
"questionId": "q137",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "24914",
"actual": "24914",
"correct": true,
"inputTokens": 13170,
"outputTokens": 456,
"latencyMs": 6113.31808300002
},
{
"questionId": "q137",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "24914",
"actual": "24914",
"correct": true,
"inputTokens": 14482,
"outputTokens": 6,
"latencyMs": 1374.9246660000063
},
{
"questionId": "q138",
"format": "json",
"model": "gpt-5-nano",
"expected": "111683",
"actual": "111683",
"correct": true,
"inputTokens": 15186,
"outputTokens": 392,
"latencyMs": 5410.596499999985
},
{
"questionId": "q138",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "111683",
"actual": "111683",
"correct": true,
"inputTokens": 17407,
"outputTokens": 6,
"latencyMs": 1607.6261659999727
},
{
"questionId": "q138",
"format": "toon",
"model": "gpt-5-nano",
"expected": "111683",
"actual": "111683",
"correct": true,
"inputTokens": 8787,
"outputTokens": 520,
"latencyMs": 6469.81479199999
},
{
"questionId": "q138",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "111683",
"actual": "111683",
"correct": true,
"inputTokens": 9277,
"outputTokens": 6,
"latencyMs": 1103.9521250000107
},
{
"questionId": "q138",
"format": "csv",
"model": "gpt-5-nano",
"expected": "111683",
"actual": "111683",
"correct": true,
"inputTokens": 8555,
"outputTokens": 904,
"latencyMs": 8993.236791000003
},
{
"questionId": "q138",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "111683",
"actual": "111683",
"correct": true,
"inputTokens": 9123,
"outputTokens": 6,
"latencyMs": 1118.0249590000021
},
{
"questionId": "q138",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "111683",
"actual": "111683",
"correct": true,
"inputTokens": 15480,
"outputTokens": 392,
"latencyMs": 4705.902084000001
},
{
"questionId": "q138",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "111683",
"actual": "111683",
"correct": true,
"inputTokens": 15365,
"outputTokens": 6,
"latencyMs": 1454.1250839999993
},
{
"questionId": "q138",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "111683",
"actual": "111683",
"correct": true,
"inputTokens": 13170,
"outputTokens": 456,
"latencyMs": 5041.734750000003
},
{
"questionId": "q138",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "111683",
"actual": "111683",
"correct": true,
"inputTokens": 14481,
"outputTokens": 6,
"latencyMs": 1199.9473330000183
},
{
"questionId": "q139",
"format": "json",
"model": "gpt-5-nano",
"expected": "13364",
"actual": "13364",
"correct": true,
"inputTokens": 15193,
"outputTokens": 328,
"latencyMs": 4364.900083000015
},
{
"questionId": "q139",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "13364",
"actual": "13364",
"correct": true,
"inputTokens": 17412,
"outputTokens": 6,
"latencyMs": 1320.7056250000023
},
{
"questionId": "q139",
"format": "toon",
"model": "gpt-5-nano",
"expected": "13364",
"actual": "13364",
"correct": true,
"inputTokens": 8794,
"outputTokens": 904,
"latencyMs": 8590.36599999998
},
{
"questionId": "q139",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "13364",
"actual": "13364",
"correct": true,
"inputTokens": 9282,
"outputTokens": 6,
"latencyMs": 1166.0237089999719
},
{
"questionId": "q139",
"format": "csv",
"model": "gpt-5-nano",
"expected": "13364",
"actual": "13364",
"correct": true,
"inputTokens": 8562,
"outputTokens": 648,
"latencyMs": 6442.057417000004
},
{
"questionId": "q139",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "13364",
"actual": "13364",
"correct": true,
"inputTokens": 9128,
"outputTokens": 6,
"latencyMs": 1342.8652910000528
},
{
"questionId": "q139",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "13364",
"actual": "13364",
"correct": true,
"inputTokens": 15487,
"outputTokens": 264,
"latencyMs": 4450.340833000024
},
{
"questionId": "q139",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "13364",
"actual": "13364",
"correct": true,
"inputTokens": 15370,
"outputTokens": 6,
"latencyMs": 1551.4001249999856
},
{
"questionId": "q139",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "13364",
"actual": "13364",
"correct": true,
"inputTokens": 13177,
"outputTokens": 520,
"latencyMs": 5858.679374999949
},
{
"questionId": "q139",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "13364",
"actual": "13364",
"correct": true,
"inputTokens": 14486,
"outputTokens": 6,
"latencyMs": 1173.6422499999753
},
{
"questionId": "q140",
"format": "json",
"model": "gpt-5-nano",
"expected": "98464",
"actual": "98464",
"correct": true,
"inputTokens": 15185,
"outputTokens": 456,
"latencyMs": 6377.878708000004
},
{
"questionId": "q140",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "98464",
"actual": "98464",
"correct": true,
"inputTokens": 17405,
"outputTokens": 6,
"latencyMs": 1312.9188750000321
},
{
"questionId": "q140",
"format": "toon",
"model": "gpt-5-nano",
"expected": "98464",
"actual": "98464",
"correct": true,
"inputTokens": 8786,
"outputTokens": 4680,
"latencyMs": 36395.80937499995
},
{
"questionId": "q140",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "98464",
"actual": "98464",
"correct": true,
"inputTokens": 9275,
"outputTokens": 6,
"latencyMs": 2024.6539580000099
},
{
"questionId": "q140",
"format": "csv",
"model": "gpt-5-nano",
"expected": "98464",
"actual": "98464",
"correct": true,
"inputTokens": 8554,
"outputTokens": 3784,
"latencyMs": 30336.309707999986
},
{
"questionId": "q140",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "98464",
"actual": "98464",
"correct": true,
"inputTokens": 9121,
"outputTokens": 6,
"latencyMs": 1237.6976249999716
},
{
"questionId": "q140",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "98464",
"actual": "98464",
"correct": true,
"inputTokens": 15479,
"outputTokens": 264,
"latencyMs": 5297.444375000021
},
{
"questionId": "q140",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "98464",
"actual": "98464",
"correct": true,
"inputTokens": 15363,
"outputTokens": 6,
"latencyMs": 1775.3334170000162
},
{
"questionId": "q140",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "98464",
"actual": "98464",
"correct": true,
"inputTokens": 13169,
"outputTokens": 392,
"latencyMs": 8030.958958000003
},
{
"questionId": "q140",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "98464",
"actual": "98464",
"correct": true,
"inputTokens": 14479,
"outputTokens": 6,
"latencyMs": 1401.1453330000513
},
{
"questionId": "q141",
"format": "json",
"model": "gpt-5-nano",
"expected": "6378",
"actual": "6378",
"correct": true,
"inputTokens": 15187,
"outputTokens": 264,
"latencyMs": 6193.845583000046
},
{
"questionId": "q141",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "6378",
"actual": "6378",
"correct": true,
"inputTokens": 17408,
"outputTokens": 6,
"latencyMs": 2449.4082920000073
},
{
"questionId": "q141",
"format": "toon",
"model": "gpt-5-nano",
"expected": "6378",
"actual": "6378",
"correct": true,
"inputTokens": 8788,
"outputTokens": 2568,
"latencyMs": 25386.850749999983
},
{
"questionId": "q141",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "6378",
"actual": "6378",
"correct": true,
"inputTokens": 9278,
"outputTokens": 6,
"latencyMs": 1351.401165999996
},
{
"questionId": "q141",
"format": "csv",
"model": "gpt-5-nano",
"expected": "6378",
"actual": "6378",
"correct": true,
"inputTokens": 8556,
"outputTokens": 456,
"latencyMs": 5087.453167000029
},
{
"questionId": "q141",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "6378",
"actual": "6378",
"correct": true,
"inputTokens": 9124,
"outputTokens": 6,
"latencyMs": 1229.4187500000116
},
{
"questionId": "q141",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "6378",
"actual": "6378",
"correct": true,
"inputTokens": 15481,
"outputTokens": 520,
"latencyMs": 6781.348249999981
},
{
"questionId": "q141",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "6378",
"actual": "6378",
"correct": true,
"inputTokens": 15366,
"outputTokens": 6,
"latencyMs": 1411.0081670000218
},
{
"questionId": "q141",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "6378",
"actual": "6378",
"correct": true,
"inputTokens": 13171,
"outputTokens": 328,
"latencyMs": 9405.325083000003
},
{
"questionId": "q141",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "6378",
"actual": "6378",
"correct": true,
"inputTokens": 14482,
"outputTokens": 6,
"latencyMs": 1575.9942499999888
},
{
"questionId": "q142",
"format": "json",
"model": "gpt-5-nano",
"expected": "254916",
"actual": "254916",
"correct": true,
"inputTokens": 15189,
"outputTokens": 456,
"latencyMs": 7723.79820900003
},
{
"questionId": "q142",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "254916",
"actual": "254916",
"correct": true,
"inputTokens": 17409,
"outputTokens": 6,
"latencyMs": 1496.878625000012
},
{
"questionId": "q142",
"format": "toon",
"model": "gpt-5-nano",
"expected": "254916",
"actual": "254916",
"correct": true,
"inputTokens": 8790,
"outputTokens": 328,
"latencyMs": 5231.312959000003
},
{
"questionId": "q142",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "254916",
"actual": "254916",
"correct": true,
"inputTokens": 9279,
"outputTokens": 6,
"latencyMs": 1145.5107919999864
},
{
"questionId": "q142",
"format": "csv",
"model": "gpt-5-nano",
"expected": "254916",
"actual": "254916",
"correct": true,
"inputTokens": 8558,
"outputTokens": 392,
"latencyMs": 4585.943417000002
},
{
"questionId": "q142",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "254916",
"actual": "254916",
"correct": true,
"inputTokens": 9125,
"outputTokens": 6,
"latencyMs": 1386.1237079999992
},
{
"questionId": "q142",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "254916",
"actual": "254916",
"correct": true,
"inputTokens": 15483,
"outputTokens": 328,
"latencyMs": 9374.248917000019
},
{
"questionId": "q142",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "254916",
"actual": "254916",
"correct": true,
"inputTokens": 15367,
"outputTokens": 6,
"latencyMs": 1332.4388340000296
},
{
"questionId": "q142",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "254916",
"actual": "254916",
"correct": true,
"inputTokens": 13173,
"outputTokens": 200,
"latencyMs": 3953.8284580000327
},
{
"questionId": "q142",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "254916",
"actual": "254916",
"correct": true,
"inputTokens": 14483,
"outputTokens": 6,
"latencyMs": 1294.3535840000259
},
{
"questionId": "q143",
"format": "json",
"model": "gpt-5-nano",
"expected": "32413",
"actual": "32413",
"correct": true,
"inputTokens": 15187,
"outputTokens": 584,
"latencyMs": 8515.676582999993
},
{
"questionId": "q143",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "32413",
"actual": "32413",
"correct": true,
"inputTokens": 17410,
"outputTokens": 6,
"latencyMs": 2508.0940420000115
},
{
"questionId": "q143",
"format": "toon",
"model": "gpt-5-nano",
"expected": "32413",
"actual": "32413",
"correct": true,
"inputTokens": 8788,
"outputTokens": 584,
"latencyMs": 6331.0320000000065
},
{
"questionId": "q143",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "32413",
"actual": "32413",
"correct": true,
"inputTokens": 9280,
"outputTokens": 6,
"latencyMs": 1249.4856250000303
},
{
"questionId": "q143",
"format": "csv",
"model": "gpt-5-nano",
"expected": "32413",
"actual": "32413",
"correct": true,
"inputTokens": 8556,
"outputTokens": 648,
"latencyMs": 8463.519499999995
},
{
"questionId": "q143",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "32413",
"actual": "32413",
"correct": true,
"inputTokens": 9126,
"outputTokens": 6,
"latencyMs": 1035.4223750000237
},
{
"questionId": "q143",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "32413",
"actual": "32413",
"correct": true,
"inputTokens": 15481,
"outputTokens": 520,
"latencyMs": 9625.975833999983
},
{
"questionId": "q143",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "32413",
"actual": "32413",
"correct": true,
"inputTokens": 15368,
"outputTokens": 6,
"latencyMs": 1460.7396250000456
},
{
"questionId": "q143",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "32413",
"actual": "32413",
"correct": true,
"inputTokens": 13171,
"outputTokens": 712,
"latencyMs": 7525.112709000008
},
{
"questionId": "q143",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "32413",
"actual": "32413",
"correct": true,
"inputTokens": 14484,
"outputTokens": 6,
"latencyMs": 1488.0029170000344
},
{
"questionId": "q144",
"format": "json",
"model": "gpt-5-nano",
"expected": "240059",
"actual": "not found",
"correct": false,
"inputTokens": 15185,
"outputTokens": 1352,
"latencyMs": 8303.157542
},
{
"questionId": "q144",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "240059",
"actual": "240059",
"correct": true,
"inputTokens": 17405,
"outputTokens": 6,
"latencyMs": 1515.7900000000373
},
{
"questionId": "q144",
"format": "toon",
"model": "gpt-5-nano",
"expected": "240059",
"actual": "0",
"correct": false,
"inputTokens": 8786,
"outputTokens": 2503,
"latencyMs": 20915.808583000035
},
{
"questionId": "q144",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "240059",
"actual": "240059",
"correct": true,
"inputTokens": 9275,
"outputTokens": 6,
"latencyMs": 1193.4237079999875
},
{
"questionId": "q144",
"format": "csv",
"model": "gpt-5-nano",
"expected": "240059",
"actual": "240059",
"correct": true,
"inputTokens": 8554,
"outputTokens": 4360,
"latencyMs": 34760.80329100002
},
{
"questionId": "q144",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "240059",
"actual": "240059",
"correct": true,
"inputTokens": 9121,
"outputTokens": 6,
"latencyMs": 3022.242749999976
},
{
"questionId": "q144",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "240059",
"actual": "0",
"correct": false,
"inputTokens": 15479,
"outputTokens": 2567,
"latencyMs": 15901.546999999962
},
{
"questionId": "q144",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "240059",
"actual": "240059",
"correct": true,
"inputTokens": 15363,
"outputTokens": 6,
"latencyMs": 1358.283374999999
},
{
"questionId": "q144",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "240059",
"actual": "240059",
"correct": true,
"inputTokens": 13169,
"outputTokens": 584,
"latencyMs": 10520.349042000016
},
{
"questionId": "q144",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "240059",
"actual": "240059",
"correct": true,
"inputTokens": 14479,
"outputTokens": 6,
"latencyMs": 1426.0678330000374
},
{
"questionId": "q145",
"format": "json",
"model": "gpt-5-nano",
"expected": "48986",
"actual": "48986",
"correct": true,
"inputTokens": 15186,
"outputTokens": 712,
"latencyMs": 7069.827042000019
},
{
"questionId": "q145",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "48986",
"actual": "48986",
"correct": true,
"inputTokens": 17406,
"outputTokens": 6,
"latencyMs": 1507.9525419999845
},
{
"questionId": "q145",
"format": "toon",
"model": "gpt-5-nano",
"expected": "48986",
"actual": "undefined",
"correct": false,
"inputTokens": 8787,
"outputTokens": 2311,
"latencyMs": 18257.385332999984
},
{
"questionId": "q145",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "48986",
"actual": "48986",
"correct": true,
"inputTokens": 9276,
"outputTokens": 6,
"latencyMs": 1397.3040420000325
},
{
"questionId": "q145",
"format": "csv",
"model": "gpt-5-nano",
"expected": "48986",
"actual": "48986",
"correct": true,
"inputTokens": 8555,
"outputTokens": 3976,
"latencyMs": 29865.140291999967
},
{
"questionId": "q145",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "48986",
"actual": "48986",
"correct": true,
"inputTokens": 9122,
"outputTokens": 6,
"latencyMs": 1218.4357079999754
},
{
"questionId": "q145",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "48986",
"actual": "48986",
"correct": true,
"inputTokens": 15480,
"outputTokens": 904,
"latencyMs": 8906.708750000049
},
{
"questionId": "q145",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "48986",
"actual": "48986",
"correct": true,
"inputTokens": 15364,
"outputTokens": 6,
"latencyMs": 1917.3721249999944
},
{
"questionId": "q145",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "48986",
"actual": "48986",
"correct": true,
"inputTokens": 13170,
"outputTokens": 1160,
"latencyMs": 9665.802708000003
},
{
"questionId": "q145",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "48986",
"actual": "48986",
"correct": true,
"inputTokens": 14480,
"outputTokens": 6,
"latencyMs": 1342.7929170000134
},
{
"questionId": "q146",
"format": "json",
"model": "gpt-5-nano",
"expected": "209624",
"actual": "209624",
"correct": true,
"inputTokens": 15185,
"outputTokens": 648,
"latencyMs": 6259.387500000012
},
{
"questionId": "q146",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "209624",
"actual": "209624",
"correct": true,
"inputTokens": 17405,
"outputTokens": 6,
"latencyMs": 1860.1597499999916
},
{
"questionId": "q146",
"format": "toon",
"model": "gpt-5-nano",
"expected": "209624",
"actual": "209624",
"correct": true,
"inputTokens": 8786,
"outputTokens": 3336,
"latencyMs": 23288.63820799999
},
{
"questionId": "q146",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "209624",
"actual": "209624",
"correct": true,
"inputTokens": 9275,
"outputTokens": 6,
"latencyMs": 1180.5804169999901
},
{
"questionId": "q146",
"format": "csv",
"model": "gpt-5-nano",
"expected": "209624",
"actual": "209624",
"correct": true,
"inputTokens": 8554,
"outputTokens": 840,
"latencyMs": 6988.782166000048
},
{
"questionId": "q146",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "209624",
"actual": "209624",
"correct": true,
"inputTokens": 9121,
"outputTokens": 6,
"latencyMs": 1391.326041000022
},
{
"questionId": "q146",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "209624",
"actual": "209624",
"correct": true,
"inputTokens": 15479,
"outputTokens": 648,
"latencyMs": 6708.915624999965
},
{
"questionId": "q146",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "209624",
"actual": "209624",
"correct": true,
"inputTokens": 15363,
"outputTokens": 6,
"latencyMs": 1364.766833999951
},
{
"questionId": "q146",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "209624",
"actual": "209624",
"correct": true,
"inputTokens": 13169,
"outputTokens": 328,
"latencyMs": 3396.199416999996
},
{
"questionId": "q146",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "209624",
"actual": "209624",
"correct": true,
"inputTokens": 14479,
"outputTokens": 6,
"latencyMs": 1378.3461249999818
},
{
"questionId": "q147",
"format": "json",
"model": "gpt-5-nano",
"expected": "58023",
"actual": "58023",
"correct": true,
"inputTokens": 15185,
"outputTokens": 200,
"latencyMs": 2947.7053750000196
},
{
"questionId": "q147",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "58023",
"actual": "58023",
"correct": true,
"inputTokens": 17406,
"outputTokens": 6,
"latencyMs": 1512.1218329999829
},
{
"questionId": "q147",
"format": "toon",
"model": "gpt-5-nano",
"expected": "58023",
"actual": "58023",
"correct": true,
"inputTokens": 8786,
"outputTokens": 840,
"latencyMs": 7657.443458000023
},
{
"questionId": "q147",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "58023",
"actual": "58023",
"correct": true,
"inputTokens": 9276,
"outputTokens": 6,
"latencyMs": 1119.6807499999995
},
{
"questionId": "q147",
"format": "csv",
"model": "gpt-5-nano",
"expected": "58023",
"actual": "58023",
"correct": true,
"inputTokens": 8554,
"outputTokens": 392,
"latencyMs": 4410.906208000029
},
{
"questionId": "q147",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "58023",
"actual": "58023",
"correct": true,
"inputTokens": 9122,
"outputTokens": 6,
"latencyMs": 1227.467249999987
},
{
"questionId": "q147",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "58023",
"actual": "58023",
"correct": true,
"inputTokens": 15479,
"outputTokens": 328,
"latencyMs": 4168.014292000036
},
{
"questionId": "q147",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "58023",
"actual": "58023",
"correct": true,
"inputTokens": 15364,
"outputTokens": 6,
"latencyMs": 1878.2624590000487
},
{
"questionId": "q147",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "58023",
"actual": "58023",
"correct": true,
"inputTokens": 13169,
"outputTokens": 456,
"latencyMs": 4726.903416000016
},
{
"questionId": "q147",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "58023",
"actual": "58023",
"correct": true,
"inputTokens": 14480,
"outputTokens": 6,
"latencyMs": 1665.950124999974
},
{
"questionId": "q148",
"format": "json",
"model": "gpt-5-nano",
"expected": "196024",
"actual": "196024",
"correct": true,
"inputTokens": 15188,
"outputTokens": 456,
"latencyMs": 5633.756834
},
{
"questionId": "q148",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "196024",
"actual": "196024",
"correct": true,
"inputTokens": 17407,
"outputTokens": 6,
"latencyMs": 1482.6277910000063
},
{
"questionId": "q148",
"format": "toon",
"model": "gpt-5-nano",
"expected": "196024",
"actual": "196024",
"correct": true,
"inputTokens": 8789,
"outputTokens": 1416,
"latencyMs": 11371.267457999988
},
{
"questionId": "q148",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "196024",
"actual": "196024",
"correct": true,
"inputTokens": 9277,
"outputTokens": 6,
"latencyMs": 1690.2400420000195
},
{
"questionId": "q148",
"format": "csv",
"model": "gpt-5-nano",
"expected": "196024",
"actual": "Repo not found",
"correct": false,
"inputTokens": 8557,
"outputTokens": 3273,
"latencyMs": 28731.530667000043
},
{
"questionId": "q148",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "196024",
"actual": "196024",
"correct": true,
"inputTokens": 9123,
"outputTokens": 6,
"latencyMs": 1070.5141670000157
},
{
"questionId": "q148",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "196024",
"actual": "196024",
"correct": true,
"inputTokens": 15482,
"outputTokens": 520,
"latencyMs": 7021.771125000028
},
{
"questionId": "q148",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "196024",
"actual": "196024",
"correct": true,
"inputTokens": 15365,
"outputTokens": 6,
"latencyMs": 1243.7466250000289
},
{
"questionId": "q148",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "196024",
"actual": "196024",
"correct": true,
"inputTokens": 13172,
"outputTokens": 456,
"latencyMs": 5286.169750000001
},
{
"questionId": "q148",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "196024",
"actual": "196024",
"correct": true,
"inputTokens": 14481,
"outputTokens": 6,
"latencyMs": 1450.456957999966
},
{
"questionId": "q149",
"format": "json",
"model": "gpt-5-nano",
"expected": "30919",
"actual": "30919",
"correct": true,
"inputTokens": 15188,
"outputTokens": 456,
"latencyMs": 5440.864250000042
},
{
"questionId": "q149",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "30919",
"actual": "30919",
"correct": true,
"inputTokens": 17408,
"outputTokens": 6,
"latencyMs": 1369.6618330000201
},
{
"questionId": "q149",
"format": "toon",
"model": "gpt-5-nano",
"expected": "30919",
"actual": "30919",
"correct": true,
"inputTokens": 8789,
"outputTokens": 712,
"latencyMs": 6130.9379999999655
},
{
"questionId": "q149",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "30919",
"actual": "30919",
"correct": true,
"inputTokens": 9278,
"outputTokens": 6,
"latencyMs": 1635.81579100003
},
{
"questionId": "q149",
"format": "csv",
"model": "gpt-5-nano",
"expected": "30919",
"actual": "N/A",
"correct": false,
"inputTokens": 8557,
"outputTokens": 1288,
"latencyMs": 20319.653374999994
},
{
"questionId": "q149",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "30919",
"actual": "30919",
"correct": true,
"inputTokens": 9124,
"outputTokens": 6,
"latencyMs": 1381.8252079999656
},
{
"questionId": "q149",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "30919",
"actual": "30919",
"correct": true,
"inputTokens": 15482,
"outputTokens": 328,
"latencyMs": 5951.751374999993
},
{
"questionId": "q149",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "30919",
"actual": "30919",
"correct": true,
"inputTokens": 15366,
"outputTokens": 6,
"latencyMs": 1367.1241670000018
},
{
"questionId": "q149",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "30919",
"actual": "30919",
"correct": true,
"inputTokens": 13172,
"outputTokens": 328,
"latencyMs": 3499.136334000039
},
{
"questionId": "q149",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "30919",
"actual": "30919",
"correct": true,
"inputTokens": 14482,
"outputTokens": 6,
"latencyMs": 1573.7027499999967
},
{
"questionId": "q150",
"format": "json",
"model": "gpt-5-nano",
"expected": "192220",
"actual": "192220",
"correct": true,
"inputTokens": 15187,
"outputTokens": 392,
"latencyMs": 7833.668625000049
},
{
"questionId": "q150",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "192220",
"actual": "192220",
"correct": true,
"inputTokens": 17405,
"outputTokens": 6,
"latencyMs": 1477.048582999967
},
{
"questionId": "q150",
"format": "toon",
"model": "gpt-5-nano",
"expected": "192220",
"actual": "192220",
"correct": true,
"inputTokens": 8788,
"outputTokens": 520,
"latencyMs": 4880.817959000007
},
{
"questionId": "q150",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "192220",
"actual": "192220",
"correct": true,
"inputTokens": 9275,
"outputTokens": 6,
"latencyMs": 1081.6979169999831
},
{
"questionId": "q150",
"format": "csv",
"model": "gpt-5-nano",
"expected": "192220",
"actual": "192220",
"correct": true,
"inputTokens": 8556,
"outputTokens": 1992,
"latencyMs": 14180.11841699999
},
{
"questionId": "q150",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "192220",
"actual": "192220",
"correct": true,
"inputTokens": 9121,
"outputTokens": 6,
"latencyMs": 1393.665417000011
},
{
"questionId": "q150",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "192220",
"actual": "192220",
"correct": true,
"inputTokens": 15481,
"outputTokens": 392,
"latencyMs": 4068.912416999985
},
{
"questionId": "q150",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "192220",
"actual": "192220",
"correct": true,
"inputTokens": 15363,
"outputTokens": 6,
"latencyMs": 1687.0724170000176
},
{
"questionId": "q150",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "192220",
"actual": "192220",
"correct": true,
"inputTokens": 13171,
"outputTokens": 392,
"latencyMs": 4048.8707089999807
},
{
"questionId": "q150",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "192220",
"actual": "192220",
"correct": true,
"inputTokens": 14479,
"outputTokens": 6,
"latencyMs": 1441.8594579999917
},
{
"questionId": "q151",
"format": "json",
"model": "gpt-5-nano",
"expected": "11763",
"actual": "11763",
"correct": true,
"inputTokens": 15190,
"outputTokens": 392,
"latencyMs": 4563.366041000001
},
{
"questionId": "q151",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "11763",
"actual": "11763",
"correct": true,
"inputTokens": 17414,
"outputTokens": 6,
"latencyMs": 1361.9952920000069
},
{
"questionId": "q151",
"format": "toon",
"model": "gpt-5-nano",
"expected": "11763",
"actual": "11763",
"correct": true,
"inputTokens": 8791,
"outputTokens": 904,
"latencyMs": 9523.924416000023
},
{
"questionId": "q151",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "11763",
"actual": "11763",
"correct": true,
"inputTokens": 9284,
"outputTokens": 6,
"latencyMs": 1235.863416999986
},
{
"questionId": "q151",
"format": "csv",
"model": "gpt-5-nano",
"expected": "11763",
"actual": "11763",
"correct": true,
"inputTokens": 8559,
"outputTokens": 584,
"latencyMs": 5264.637583000003
},
{
"questionId": "q151",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "11763",
"actual": "11763",
"correct": true,
"inputTokens": 9130,
"outputTokens": 6,
"latencyMs": 1307.1584169999696
},
{
"questionId": "q151",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "11763",
"actual": "11763",
"correct": true,
"inputTokens": 15484,
"outputTokens": 328,
"latencyMs": 8621.355207999994
},
{
"questionId": "q151",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "11763",
"actual": "11763",
"correct": true,
"inputTokens": 15372,
"outputTokens": 6,
"latencyMs": 1464.8200829999987
},
{
"questionId": "q151",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "11763",
"actual": "11763",
"correct": true,
"inputTokens": 13174,
"outputTokens": 264,
"latencyMs": 3034.7359999999753
},
{
"questionId": "q151",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "11763",
"actual": "11763",
"correct": true,
"inputTokens": 14488,
"outputTokens": 6,
"latencyMs": 1959.3285000000033
},
{
"questionId": "q152",
"format": "json",
"model": "gpt-5-nano",
"expected": "100",
"actual": "100",
"correct": true,
"inputTokens": 15187,
"outputTokens": 2055,
"latencyMs": 16430.930082999985
},
{
"questionId": "q152",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "0",
"correct": false,
"inputTokens": 17406,
"outputTokens": 5,
"latencyMs": 1730.124458999955
},
{
"questionId": "q152",
"format": "toon",
"model": "gpt-5-nano",
"expected": "100",
"actual": "100",
"correct": true,
"inputTokens": 8788,
"outputTokens": 839,
"latencyMs": 7275.640458000009
},
{
"questionId": "q152",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "0",
"correct": false,
"inputTokens": 9276,
"outputTokens": 5,
"latencyMs": 1286.8315839999705
},
{
"questionId": "q152",
"format": "csv",
"model": "gpt-5-nano",
"expected": "100",
"actual": "0",
"correct": false,
"inputTokens": 8556,
"outputTokens": 2695,
"latencyMs": 24177.570000000007
},
{
"questionId": "q152",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "0",
"correct": false,
"inputTokens": 9122,
"outputTokens": 5,
"latencyMs": 1102.5337500000023
},
{
"questionId": "q152",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "100",
"actual": "100",
"correct": true,
"inputTokens": 15481,
"outputTokens": 1671,
"latencyMs": 14929.856415999995
},
{
"questionId": "q152",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "100",
"correct": true,
"inputTokens": 15364,
"outputTokens": 5,
"latencyMs": 1227.103541999997
},
{
"questionId": "q152",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "100",
"actual": "0",
"correct": false,
"inputTokens": 13171,
"outputTokens": 583,
"latencyMs": 5785.248666999978
},
{
"questionId": "q152",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "0",
"correct": false,
"inputTokens": 14480,
"outputTokens": 5,
"latencyMs": 1959.456125000026
},
{
"questionId": "q153",
"format": "json",
"model": "gpt-5-nano",
"expected": "15404143",
"actual": "19196630",
"correct": false,
"inputTokens": 15188,
"outputTokens": 13385,
"latencyMs": 239619.323125
},
{
"questionId": "q153",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "15404143",
"actual": "13,847,892",
"correct": false,
"inputTokens": 17407,
"outputTokens": 9,
"latencyMs": 1838.8340420000022
},
{
"questionId": "q153",
"format": "toon",
"model": "gpt-5-nano",
"expected": "15404143",
"actual": "15404143",
"correct": true,
"inputTokens": 8789,
"outputTokens": 12169,
"latencyMs": 109453.991416
},
{
"questionId": "q153",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "15404143",
"actual": "13,847,892",
"correct": false,
"inputTokens": 9277,
"outputTokens": 9,
"latencyMs": 1443.470417000004
},
{
"questionId": "q153",
"format": "csv",
"model": "gpt-5-nano",
"expected": "15404143",
"actual": "15404143",
"correct": true,
"inputTokens": 8557,
"outputTokens": 6281,
"latencyMs": 45474.442209
},
{
"questionId": "q153",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "15404143",
"actual": "15,847,892",
"correct": false,
"inputTokens": 9123,
"outputTokens": 9,
"latencyMs": 1361.6022089999751
},
{
"questionId": "q153",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "15404143",
"actual": "15404143",
"correct": true,
"inputTokens": 15482,
"outputTokens": 4489,
"latencyMs": 29654.25554099999
},
{
"questionId": "q153",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "15404143",
"actual": "13,847,892",
"correct": false,
"inputTokens": 15365,
"outputTokens": 9,
"latencyMs": 1796.0902500000084
},
{
"questionId": "q153",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "15404143",
"actual": "15404143",
"correct": true,
"inputTokens": 13172,
"outputTokens": 6409,
"latencyMs": 70234.84133299999
},
{
"questionId": "q153",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "15404143",
"actual": "13,847,892",
"correct": false,
"inputTokens": 14481,
"outputTokens": 9,
"latencyMs": 1965.7452919999487
},
{
"questionId": "q154",
"format": "json",
"model": "gpt-5-nano",
"expected": "100",
"actual": "60",
"correct": false,
"inputTokens": 15188,
"outputTokens": 7495,
"latencyMs": 72992.43658400001
},
{
"questionId": "q154",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "100",
"correct": true,
"inputTokens": 17408,
"outputTokens": 5,
"latencyMs": 1772.3059999999823
},
{
"questionId": "q154",
"format": "toon",
"model": "gpt-5-nano",
"expected": "100",
"actual": "100",
"correct": true,
"inputTokens": 8789,
"outputTokens": 2759,
"latencyMs": 19214.133417000005
},
{
"questionId": "q154",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "100",
"correct": true,
"inputTokens": 9278,
"outputTokens": 5,
"latencyMs": 1115.5979170000064
},
{
"questionId": "q154",
"format": "csv",
"model": "gpt-5-nano",
"expected": "100",
"actual": "100",
"correct": true,
"inputTokens": 8557,
"outputTokens": 2439,
"latencyMs": 27365.987334000005
},
{
"questionId": "q154",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "100",
"correct": true,
"inputTokens": 9124,
"outputTokens": 5,
"latencyMs": 1322.4322910000337
},
{
"questionId": "q154",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "100",
"actual": "100",
"correct": true,
"inputTokens": 15482,
"outputTokens": 5767,
"latencyMs": 60524.90554200002
},
{
"questionId": "q154",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "100",
"correct": true,
"inputTokens": 15366,
"outputTokens": 5,
"latencyMs": 1597.7364170000073
},
{
"questionId": "q154",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "100",
"actual": "100",
"correct": true,
"inputTokens": 13172,
"outputTokens": 4039,
"latencyMs": 28819.869999999995
},
{
"questionId": "q154",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "100",
"correct": true,
"inputTokens": 14482,
"outputTokens": 5,
"latencyMs": 1798.9455409999937
},
{
"questionId": "q155",
"format": "json",
"model": "gpt-5-nano",
"expected": "100",
"actual": "86",
"correct": false,
"inputTokens": 15188,
"outputTokens": 2375,
"latencyMs": 23963.549916999997
},
{
"questionId": "q155",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "71",
"correct": false,
"inputTokens": 17408,
"outputTokens": 5,
"latencyMs": 1836.1375000000116
},
{
"questionId": "q155",
"format": "toon",
"model": "gpt-5-nano",
"expected": "100",
"actual": "100",
"correct": true,
"inputTokens": 8789,
"outputTokens": 3079,
"latencyMs": 26957.04420799995
},
{
"questionId": "q155",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "42",
"correct": false,
"inputTokens": 9278,
"outputTokens": 5,
"latencyMs": 1209.7997920000344
},
{
"questionId": "q155",
"format": "csv",
"model": "gpt-5-nano",
"expected": "100",
"actual": "100",
"correct": true,
"inputTokens": 8557,
"outputTokens": 2887,
"latencyMs": 27174.970375000034
},
{
"questionId": "q155",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "47",
"correct": false,
"inputTokens": 9124,
"outputTokens": 5,
"latencyMs": 1293.6252920000115
},
{
"questionId": "q155",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "100",
"actual": "98",
"correct": false,
"inputTokens": 15482,
"outputTokens": 2567,
"latencyMs": 29565.065250000043
},
{
"questionId": "q155",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "71",
"correct": false,
"inputTokens": 15366,
"outputTokens": 5,
"latencyMs": 1230.7459160000435
},
{
"questionId": "q155",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "100",
"actual": "100",
"correct": true,
"inputTokens": 13172,
"outputTokens": 2695,
"latencyMs": 20706.84841700003
},
{
"questionId": "q155",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "71",
"correct": false,
"inputTokens": 14482,
"outputTokens": 5,
"latencyMs": 1743.1536249999772
},
{
"questionId": "q156",
"format": "json",
"model": "gpt-5-nano",
"expected": "76",
"actual": "41",
"correct": false,
"inputTokens": 15188,
"outputTokens": 8263,
"latencyMs": 60899.858959000034
},
{
"questionId": "q156",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "76",
"actual": "100",
"correct": false,
"inputTokens": 17408,
"outputTokens": 5,
"latencyMs": 1350.1540420000092
},
{
"questionId": "q156",
"format": "toon",
"model": "gpt-5-nano",
"expected": "76",
"actual": "76",
"correct": true,
"inputTokens": 8789,
"outputTokens": 3847,
"latencyMs": 30491.779582999996
},
{
"questionId": "q156",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "76",
"actual": "100",
"correct": false,
"inputTokens": 9278,
"outputTokens": 5,
"latencyMs": 1513.2665410000482
},
{
"questionId": "q156",
"format": "csv",
"model": "gpt-5-nano",
"expected": "76",
"actual": "76",
"correct": true,
"inputTokens": 8557,
"outputTokens": 3847,
"latencyMs": 25522.397125000018
},
{
"questionId": "q156",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "76",
"actual": "100",
"correct": false,
"inputTokens": 9124,
"outputTokens": 5,
"latencyMs": 1150.7281660000444
},
{
"questionId": "q156",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "76",
"actual": "76",
"correct": true,
"inputTokens": 15482,
"outputTokens": 2631,
"latencyMs": 22525.465083000017
},
{
"questionId": "q156",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "76",
"actual": "100",
"correct": false,
"inputTokens": 15366,
"outputTokens": 5,
"latencyMs": 1438.5829169999924
},
{
"questionId": "q156",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "76",
"actual": "62",
"correct": false,
"inputTokens": 13172,
"outputTokens": 1351,
"latencyMs": 11162.623291999975
},
{
"questionId": "q156",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "76",
"actual": "100",
"correct": false,
"inputTokens": 14482,
"outputTokens": 5,
"latencyMs": 1305.162249999994
},
{
"questionId": "q157",
"format": "json",
"model": "gpt-5-nano",
"expected": "100",
"actual": "129",
"correct": false,
"inputTokens": 15188,
"outputTokens": 6599,
"latencyMs": 49590.68900000001
},
{
"questionId": "q157",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "89",
"correct": false,
"inputTokens": 17409,
"outputTokens": 5,
"latencyMs": 1750.9506249999977
},
{
"questionId": "q157",
"format": "toon",
"model": "gpt-5-nano",
"expected": "100",
"actual": "100",
"correct": true,
"inputTokens": 8789,
"outputTokens": 8903,
"latencyMs": 68556.36550000001
},
{
"questionId": "q157",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "73",
"correct": false,
"inputTokens": 9279,
"outputTokens": 5,
"latencyMs": 1148.3701669999864
},
{
"questionId": "q157",
"format": "csv",
"model": "gpt-5-nano",
"expected": "100",
"actual": "100",
"correct": true,
"inputTokens": 8557,
"outputTokens": 3271,
"latencyMs": 36128.254709
},
{
"questionId": "q157",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "89",
"correct": false,
"inputTokens": 9125,
"outputTokens": 5,
"latencyMs": 1137.2578750000102
},
{
"questionId": "q157",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "100",
"actual": "79",
"correct": false,
"inputTokens": 15482,
"outputTokens": 3527,
"latencyMs": 35526.23958300002
},
{
"questionId": "q157",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "95",
"correct": false,
"inputTokens": 15367,
"outputTokens": 5,
"latencyMs": 1501.6561670000083
},
{
"questionId": "q157",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "100",
"actual": "99",
"correct": false,
"inputTokens": 13172,
"outputTokens": 3143,
"latencyMs": 26700.229333000025
},
{
"questionId": "q157",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "95",
"correct": false,
"inputTokens": 14483,
"outputTokens": 5,
"latencyMs": 1159.0904580000206
},
{
"questionId": "q158",
"format": "json",
"model": "gpt-5-nano",
"expected": "95",
"actual": "94",
"correct": false,
"inputTokens": 15188,
"outputTokens": 4999,
"latencyMs": 32710.407750000013
},
{
"questionId": "q158",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "95",
"actual": "42",
"correct": false,
"inputTokens": 17409,
"outputTokens": 5,
"latencyMs": 1451.6710420000018
},
{
"questionId": "q158",
"format": "toon",
"model": "gpt-5-nano",
"expected": "95",
"actual": "82",
"correct": false,
"inputTokens": 8789,
"outputTokens": 3143,
"latencyMs": 18360.73424999998
},
{
"questionId": "q158",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "95",
"actual": "42",
"correct": false,
"inputTokens": 9279,
"outputTokens": 5,
"latencyMs": 1035.2159160000156
},
{
"questionId": "q158",
"format": "csv",
"model": "gpt-5-nano",
"expected": "95",
"actual": "95",
"correct": true,
"inputTokens": 8557,
"outputTokens": 4487,
"latencyMs": 28020.044915999984
},
{
"questionId": "q158",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "95",
"actual": "42",
"correct": false,
"inputTokens": 9125,
"outputTokens": 5,
"latencyMs": 1175.8671249999898
},
{
"questionId": "q158",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "95",
"actual": "77",
"correct": false,
"inputTokens": 15482,
"outputTokens": 2887,
"latencyMs": 24031.185459
},
{
"questionId": "q158",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "95",
"actual": "47",
"correct": false,
"inputTokens": 15367,
"outputTokens": 5,
"latencyMs": 1724.9393750000163
},
{
"questionId": "q158",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "95",
"actual": "81",
"correct": false,
"inputTokens": 13172,
"outputTokens": 4359,
"latencyMs": 35723.19641699997
},
{
"questionId": "q158",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "95",
"actual": "47",
"correct": false,
"inputTokens": 14483,
"outputTokens": 5,
"latencyMs": 1663.259167000011
},
{
"questionId": "q159",
"format": "json",
"model": "gpt-5-nano",
"expected": "83",
"actual": "71",
"correct": false,
"inputTokens": 15188,
"outputTokens": 2439,
"latencyMs": 18168.518166999973
},
{
"questionId": "q159",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "83",
"actual": "71",
"correct": false,
"inputTokens": 17409,
"outputTokens": 5,
"latencyMs": 1390.1757499999949
},
{
"questionId": "q159",
"format": "toon",
"model": "gpt-5-nano",
"expected": "83",
"actual": "57",
"correct": false,
"inputTokens": 8789,
"outputTokens": 4423,
"latencyMs": 41240.42016700003
},
{
"questionId": "q159",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "83",
"actual": "73",
"correct": false,
"inputTokens": 9279,
"outputTokens": 5,
"latencyMs": 1066.675458999991
},
{
"questionId": "q159",
"format": "csv",
"model": "gpt-5-nano",
"expected": "83",
"actual": "83",
"correct": true,
"inputTokens": 8557,
"outputTokens": 5831,
"latencyMs": 40638.93858400005
},
{
"questionId": "q159",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "83",
"actual": "73",
"correct": false,
"inputTokens": 9125,
"outputTokens": 5,
"latencyMs": 1394.1952499999898
},
{
"questionId": "q159",
"format": "markdown-kv",
"model": "gpt-5-nano",
"expected": "83",
"actual": "83",
"correct": true,
"inputTokens": 15482,
"outputTokens": 3591,
"latencyMs": 25356.36183400004
},
{
"questionId": "q159",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "83",
"actual": "71",
"correct": false,
"inputTokens": 15367,
"outputTokens": 5,
"latencyMs": 1238.0827089999802
},
{
"questionId": "q159",
"format": "yaml",
"model": "gpt-5-nano",
"expected": "83",
"actual": "72",
"correct": false,
"inputTokens": 13172,
"outputTokens": 2567,
"latencyMs": 25124.520583999984
},
{
"questionId": "q159",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "83",
"actual": "71",
"correct": false,
"inputTokens": 14483,
"outputTokens": 5,
"latencyMs": 2058.834957999992
}
]