Files
toon/benchmarks/results/accuracy/raw-results.json
2025-10-27 11:48:33 +01:00

17492 lines
361 KiB
JSON

[
{
"questionId": "q1",
"format": "json",
"model": "gpt-4o-mini",
"expected": "56176",
"actual": "56176",
"correct": true,
"inputTokens": 6391,
"outputTokens": 3,
"latencyMs": 1313
},
{
"questionId": "q1",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "56176",
"actual": "56176",
"correct": true,
"inputTokens": 7870,
"outputTokens": 6,
"latencyMs": 1346
},
{
"questionId": "q1",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "56176",
"actual": "56176",
"correct": true,
"inputTokens": 2528,
"outputTokens": 3,
"latencyMs": 1191
},
{
"questionId": "q1",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "56176",
"actual": "56176",
"correct": true,
"inputTokens": 2982,
"outputTokens": 6,
"latencyMs": 1399
},
{
"questionId": "q1",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "56176",
"actual": "56176",
"correct": true,
"inputTokens": 2382,
"outputTokens": 3,
"latencyMs": 5010
},
{
"questionId": "q1",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "56176",
"actual": "56176",
"correct": true,
"inputTokens": 2856,
"outputTokens": 6,
"latencyMs": 1472
},
{
"questionId": "q1",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "56176",
"actual": "56176",
"correct": true,
"inputTokens": 6317,
"outputTokens": 3,
"latencyMs": 1667
},
{
"questionId": "q1",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "56176",
"actual": "56176",
"correct": true,
"inputTokens": 6365,
"outputTokens": 6,
"latencyMs": 1507
},
{
"questionId": "q1",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "56176",
"actual": "56176",
"correct": true,
"inputTokens": 5013,
"outputTokens": 3,
"latencyMs": 1325
},
{
"questionId": "q1",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "56176",
"actual": "56176",
"correct": true,
"inputTokens": 5760,
"outputTokens": 6,
"latencyMs": 2280
},
{
"questionId": "q2",
"format": "json",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6391,
"outputTokens": 2,
"latencyMs": 3167
},
{
"questionId": "q2",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 7869,
"outputTokens": 4,
"latencyMs": 1267
},
{
"questionId": "q2",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2528,
"outputTokens": 2,
"latencyMs": 1402
},
{
"questionId": "q2",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2981,
"outputTokens": 4,
"latencyMs": 1290
},
{
"questionId": "q2",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2382,
"outputTokens": 2,
"latencyMs": 5070
},
{
"questionId": "q2",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2855,
"outputTokens": 4,
"latencyMs": 1320
},
{
"questionId": "q2",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6317,
"outputTokens": 2,
"latencyMs": 1745
},
{
"questionId": "q2",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6364,
"outputTokens": 4,
"latencyMs": 1191
},
{
"questionId": "q2",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 5013,
"outputTokens": 2,
"latencyMs": 2713
},
{
"questionId": "q2",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 5759,
"outputTokens": 4,
"latencyMs": 1309
},
{
"questionId": "q3",
"format": "json",
"model": "gpt-4o-mini",
"expected": "lorenza.kunze@yahoo.com",
"actual": "lorenza.kunze@yahoo.com",
"correct": true,
"inputTokens": 6393,
"outputTokens": 7,
"latencyMs": 1160
},
{
"questionId": "q3",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "lorenza.kunze@yahoo.com",
"actual": "lorenza.kunze@yahoo.com",
"correct": true,
"inputTokens": 7874,
"outputTokens": 12,
"latencyMs": 1338
},
{
"questionId": "q3",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "lorenza.kunze@yahoo.com",
"actual": "lorenza.kunze@yahoo.com",
"correct": true,
"inputTokens": 2530,
"outputTokens": 7,
"latencyMs": 1478
},
{
"questionId": "q3",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "lorenza.kunze@yahoo.com",
"actual": "lorenza.kunze@yahoo.com",
"correct": true,
"inputTokens": 2986,
"outputTokens": 12,
"latencyMs": 1563
},
{
"questionId": "q3",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "lorenza.kunze@yahoo.com",
"actual": "lorenza.kunze@yahoo.com",
"correct": true,
"inputTokens": 2384,
"outputTokens": 7,
"latencyMs": 1310
},
{
"questionId": "q3",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "lorenza.kunze@yahoo.com",
"actual": "lorenza.kunze@yahoo.com",
"correct": true,
"inputTokens": 2860,
"outputTokens": 12,
"latencyMs": 1236
},
{
"questionId": "q3",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "lorenza.kunze@yahoo.com",
"actual": "lorenza.kunze@yahoo.com",
"correct": true,
"inputTokens": 6319,
"outputTokens": 7,
"latencyMs": 2236
},
{
"questionId": "q3",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "lorenza.kunze@yahoo.com",
"actual": "lorenza.kunze@yahoo.com",
"correct": true,
"inputTokens": 6369,
"outputTokens": 12,
"latencyMs": 1253
},
{
"questionId": "q3",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "lorenza.kunze@yahoo.com",
"actual": "lorenza.kunze@yahoo.com",
"correct": true,
"inputTokens": 5015,
"outputTokens": 7,
"latencyMs": 1917
},
{
"questionId": "q3",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "lorenza.kunze@yahoo.com",
"actual": "lorenza.kunze@yahoo.com",
"correct": true,
"inputTokens": 5764,
"outputTokens": 12,
"latencyMs": 1332
},
{
"questionId": "q4",
"format": "json",
"model": "gpt-4o-mini",
"expected": "117381",
"actual": "117381",
"correct": true,
"inputTokens": 6391,
"outputTokens": 3,
"latencyMs": 2945
},
{
"questionId": "q4",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "117381",
"actual": "117381",
"correct": true,
"inputTokens": 7870,
"outputTokens": 6,
"latencyMs": 1773
},
{
"questionId": "q4",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "117381",
"actual": "117381",
"correct": true,
"inputTokens": 2528,
"outputTokens": 3,
"latencyMs": 1294
},
{
"questionId": "q4",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "117381",
"actual": "117381",
"correct": true,
"inputTokens": 2982,
"outputTokens": 6,
"latencyMs": 980
},
{
"questionId": "q4",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "117381",
"actual": "117381",
"correct": true,
"inputTokens": 2382,
"outputTokens": 3,
"latencyMs": 1747
},
{
"questionId": "q4",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "117381",
"actual": "117381",
"correct": true,
"inputTokens": 2856,
"outputTokens": 6,
"latencyMs": 1197
},
{
"questionId": "q4",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "117381",
"actual": "117381",
"correct": true,
"inputTokens": 6317,
"outputTokens": 3,
"latencyMs": 1039
},
{
"questionId": "q4",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "117381",
"actual": "117381",
"correct": true,
"inputTokens": 6365,
"outputTokens": 6,
"latencyMs": 1453
},
{
"questionId": "q4",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "117381",
"actual": "117381",
"correct": true,
"inputTokens": 5013,
"outputTokens": 3,
"latencyMs": 1056
},
{
"questionId": "q4",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "117381",
"actual": "117381",
"correct": true,
"inputTokens": 5760,
"outputTokens": 6,
"latencyMs": 1564
},
{
"questionId": "q5",
"format": "json",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6390,
"outputTokens": 2,
"latencyMs": 1263
},
{
"questionId": "q5",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 7868,
"outputTokens": 4,
"latencyMs": 1097
},
{
"questionId": "q5",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2527,
"outputTokens": 2,
"latencyMs": 1248
},
{
"questionId": "q5",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2980,
"outputTokens": 4,
"latencyMs": 1486
},
{
"questionId": "q5",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2381,
"outputTokens": 2,
"latencyMs": 1311
},
{
"questionId": "q5",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2854,
"outputTokens": 4,
"latencyMs": 1019
},
{
"questionId": "q5",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6316,
"outputTokens": 2,
"latencyMs": 1287
},
{
"questionId": "q5",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6363,
"outputTokens": 4,
"latencyMs": 1243
},
{
"questionId": "q5",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 5012,
"outputTokens": 2,
"latencyMs": 1339
},
{
"questionId": "q5",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 5758,
"outputTokens": 4,
"latencyMs": 1621
},
{
"questionId": "q6",
"format": "json",
"model": "gpt-4o-mini",
"expected": "jayda60@hotmail.com",
"actual": "jayda60@hotmail.com",
"correct": true,
"inputTokens": 6391,
"outputTokens": 6,
"latencyMs": 1625
},
{
"questionId": "q6",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "jayda60@hotmail.com",
"actual": "jayda60@hotmail.com",
"correct": true,
"inputTokens": 7871,
"outputTokens": 11,
"latencyMs": 1328
},
{
"questionId": "q6",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "jayda60@hotmail.com",
"actual": "jayda60@hotmail.com",
"correct": true,
"inputTokens": 2528,
"outputTokens": 6,
"latencyMs": 1463
},
{
"questionId": "q6",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "jayda60@hotmail.com",
"actual": "jayda60@hotmail.com",
"correct": true,
"inputTokens": 2983,
"outputTokens": 11,
"latencyMs": 1149
},
{
"questionId": "q6",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "jayda60@hotmail.com",
"actual": "jayda60@hotmail.com",
"correct": true,
"inputTokens": 2382,
"outputTokens": 6,
"latencyMs": 1474
},
{
"questionId": "q6",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "jayda60@hotmail.com",
"actual": "jayda60@hotmail.com",
"correct": true,
"inputTokens": 2857,
"outputTokens": 11,
"latencyMs": 977
},
{
"questionId": "q6",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "jayda60@hotmail.com",
"actual": "jayda60@hotmail.com",
"correct": true,
"inputTokens": 6317,
"outputTokens": 6,
"latencyMs": 2079
},
{
"questionId": "q6",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "jayda60@hotmail.com",
"actual": "jayda60@hotmail.com",
"correct": true,
"inputTokens": 6366,
"outputTokens": 11,
"latencyMs": 1134
},
{
"questionId": "q6",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "jayda60@hotmail.com",
"actual": "jayda60@hotmail.com",
"correct": true,
"inputTokens": 5013,
"outputTokens": 6,
"latencyMs": 1124
},
{
"questionId": "q6",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "jayda60@hotmail.com",
"actual": "jayda60@hotmail.com",
"correct": true,
"inputTokens": 5761,
"outputTokens": 11,
"latencyMs": 1053
},
{
"questionId": "q7",
"format": "json",
"model": "gpt-4o-mini",
"expected": "92971",
"actual": "92971",
"correct": true,
"inputTokens": 6391,
"outputTokens": 3,
"latencyMs": 1427
},
{
"questionId": "q7",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "92971",
"actual": "92971",
"correct": true,
"inputTokens": 7870,
"outputTokens": 6,
"latencyMs": 1246
},
{
"questionId": "q7",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "92971",
"actual": "92971",
"correct": true,
"inputTokens": 2528,
"outputTokens": 3,
"latencyMs": 1171
},
{
"questionId": "q7",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "92971",
"actual": "92971",
"correct": true,
"inputTokens": 2982,
"outputTokens": 6,
"latencyMs": 1547
},
{
"questionId": "q7",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "92971",
"actual": "92971",
"correct": true,
"inputTokens": 2382,
"outputTokens": 3,
"latencyMs": 1523
},
{
"questionId": "q7",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "92971",
"actual": "92971",
"correct": true,
"inputTokens": 2856,
"outputTokens": 6,
"latencyMs": 1148
},
{
"questionId": "q7",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "92971",
"actual": "92971",
"correct": true,
"inputTokens": 6317,
"outputTokens": 3,
"latencyMs": 1360
},
{
"questionId": "q7",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "92971",
"actual": "92971",
"correct": true,
"inputTokens": 6365,
"outputTokens": 6,
"latencyMs": 1100
},
{
"questionId": "q7",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "92971",
"actual": "92971",
"correct": true,
"inputTokens": 5013,
"outputTokens": 3,
"latencyMs": 1116
},
{
"questionId": "q7",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "92971",
"actual": "92971",
"correct": true,
"inputTokens": 5760,
"outputTokens": 6,
"latencyMs": 1202
},
{
"questionId": "q8",
"format": "json",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Operations",
"correct": false,
"inputTokens": 6391,
"outputTokens": 2,
"latencyMs": 974
},
{
"questionId": "q8",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 7871,
"outputTokens": 4,
"latencyMs": 1357
},
{
"questionId": "q8",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2528,
"outputTokens": 2,
"latencyMs": 1107
},
{
"questionId": "q8",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2983,
"outputTokens": 4,
"latencyMs": 1126
},
{
"questionId": "q8",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2382,
"outputTokens": 2,
"latencyMs": 1124
},
{
"questionId": "q8",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2857,
"outputTokens": 4,
"latencyMs": 1208
},
{
"questionId": "q8",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Operations",
"correct": false,
"inputTokens": 6317,
"outputTokens": 2,
"latencyMs": 1463
},
{
"questionId": "q8",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6366,
"outputTokens": 4,
"latencyMs": 1175
},
{
"questionId": "q8",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 5013,
"outputTokens": 2,
"latencyMs": 1952
},
{
"questionId": "q8",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 5761,
"outputTokens": 4,
"latencyMs": 1271
},
{
"questionId": "q9",
"format": "json",
"model": "gpt-4o-mini",
"expected": "terrance.hansen@yahoo.com",
"actual": "terrance.hansen@yahoo.com",
"correct": true,
"inputTokens": 6393,
"outputTokens": 7,
"latencyMs": 1301
},
{
"questionId": "q9",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "terrance.hansen@yahoo.com",
"actual": "terrance.hansen@yahoo.com",
"correct": true,
"inputTokens": 7871,
"outputTokens": 11,
"latencyMs": 1371
},
{
"questionId": "q9",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "terrance.hansen@yahoo.com",
"actual": "terrance.hansen@yahoo.com",
"correct": true,
"inputTokens": 2530,
"outputTokens": 7,
"latencyMs": 1197
},
{
"questionId": "q9",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "terrance.hansen@yahoo.com",
"actual": "terrance.hansen@yahoo.com",
"correct": true,
"inputTokens": 2983,
"outputTokens": 11,
"latencyMs": 1088
},
{
"questionId": "q9",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "terrance.hansen@yahoo.com",
"actual": "terrance.hansen@yahoo.com",
"correct": true,
"inputTokens": 2384,
"outputTokens": 7,
"latencyMs": 1310
},
{
"questionId": "q9",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "terrance.hansen@yahoo.com",
"actual": "terrance.hansen@yahoo.com",
"correct": true,
"inputTokens": 2857,
"outputTokens": 11,
"latencyMs": 1300
},
{
"questionId": "q9",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "terrance.hansen@yahoo.com",
"actual": "terrance.hansen@yahoo.com",
"correct": true,
"inputTokens": 6319,
"outputTokens": 7,
"latencyMs": 1531
},
{
"questionId": "q9",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "terrance.hansen@yahoo.com",
"actual": "terrance.hansen@yahoo.com",
"correct": true,
"inputTokens": 6366,
"outputTokens": 11,
"latencyMs": 1275
},
{
"questionId": "q9",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "terrance.hansen@yahoo.com",
"actual": "terrence.hansen@yahoo.com",
"correct": false,
"inputTokens": 5015,
"outputTokens": 7,
"latencyMs": 1245
},
{
"questionId": "q9",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "terrance.hansen@yahoo.com",
"actual": "terrance.hansen@yahoo.com",
"correct": true,
"inputTokens": 5761,
"outputTokens": 11,
"latencyMs": 1215
},
{
"questionId": "q10",
"format": "json",
"model": "gpt-4o-mini",
"expected": "107744",
"actual": "107744",
"correct": true,
"inputTokens": 6392,
"outputTokens": 3,
"latencyMs": 4959
},
{
"questionId": "q10",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "107744",
"actual": "107744",
"correct": true,
"inputTokens": 7870,
"outputTokens": 6,
"latencyMs": 1269
},
{
"questionId": "q10",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "107744",
"actual": "107744",
"correct": true,
"inputTokens": 2529,
"outputTokens": 3,
"latencyMs": 1111
},
{
"questionId": "q10",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "107744",
"actual": "107744",
"correct": true,
"inputTokens": 2982,
"outputTokens": 6,
"latencyMs": 1254
},
{
"questionId": "q10",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "107744",
"actual": "107744",
"correct": true,
"inputTokens": 2383,
"outputTokens": 3,
"latencyMs": 1616
},
{
"questionId": "q10",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "107744",
"actual": "107744",
"correct": true,
"inputTokens": 2856,
"outputTokens": 6,
"latencyMs": 1123
},
{
"questionId": "q10",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "107744",
"actual": "107744",
"correct": true,
"inputTokens": 6318,
"outputTokens": 3,
"latencyMs": 1201
},
{
"questionId": "q10",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "107744",
"actual": "107744",
"correct": true,
"inputTokens": 6365,
"outputTokens": 6,
"latencyMs": 1371
},
{
"questionId": "q10",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "107744",
"actual": "107744",
"correct": true,
"inputTokens": 5014,
"outputTokens": 3,
"latencyMs": 1503
},
{
"questionId": "q10",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "107744",
"actual": "107744",
"correct": true,
"inputTokens": 5760,
"outputTokens": 6,
"latencyMs": 1249
},
{
"questionId": "q11",
"format": "json",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6391,
"outputTokens": 2,
"latencyMs": 1383
},
{
"questionId": "q11",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 7869,
"outputTokens": 4,
"latencyMs": 1081
},
{
"questionId": "q11",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2528,
"outputTokens": 2,
"latencyMs": 1677
},
{
"questionId": "q11",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2981,
"outputTokens": 4,
"latencyMs": 1072
},
{
"questionId": "q11",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2382,
"outputTokens": 2,
"latencyMs": 1142
},
{
"questionId": "q11",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2855,
"outputTokens": 4,
"latencyMs": 991
},
{
"questionId": "q11",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6317,
"outputTokens": 2,
"latencyMs": 1339
},
{
"questionId": "q11",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6364,
"outputTokens": 4,
"latencyMs": 1117
},
{
"questionId": "q11",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 5013,
"outputTokens": 2,
"latencyMs": 2483
},
{
"questionId": "q11",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 5759,
"outputTokens": 4,
"latencyMs": 1187
},
{
"questionId": "q12",
"format": "json",
"model": "gpt-4o-mini",
"expected": "allan21@gmail.com",
"actual": "allan21@gmail.com",
"correct": true,
"inputTokens": 6390,
"outputTokens": 5,
"latencyMs": 1827
},
{
"questionId": "q12",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "allan21@gmail.com",
"actual": "allan21@gmail.com",
"correct": true,
"inputTokens": 7867,
"outputTokens": 9,
"latencyMs": 1121
},
{
"questionId": "q12",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "allan21@gmail.com",
"actual": "allan21@gmail.com",
"correct": true,
"inputTokens": 2527,
"outputTokens": 5,
"latencyMs": 1373
},
{
"questionId": "q12",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "allan21@gmail.com",
"actual": "allan21@gmail.com",
"correct": true,
"inputTokens": 2979,
"outputTokens": 9,
"latencyMs": 1284
},
{
"questionId": "q12",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "allan21@gmail.com",
"actual": "allan21@gmail.com",
"correct": true,
"inputTokens": 2381,
"outputTokens": 5,
"latencyMs": 1751
},
{
"questionId": "q12",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "allan21@gmail.com",
"actual": "allan21@gmail.com",
"correct": true,
"inputTokens": 2853,
"outputTokens": 9,
"latencyMs": 1140
},
{
"questionId": "q12",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "allan21@gmail.com",
"actual": "allan21@gmail.com",
"correct": true,
"inputTokens": 6316,
"outputTokens": 5,
"latencyMs": 1624
},
{
"questionId": "q12",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "allan21@gmail.com",
"actual": "allan21@gmail.com",
"correct": true,
"inputTokens": 6362,
"outputTokens": 9,
"latencyMs": 1071
},
{
"questionId": "q12",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "allan21@gmail.com",
"actual": "allan21@gmail.com",
"correct": true,
"inputTokens": 5012,
"outputTokens": 5,
"latencyMs": 1970
},
{
"questionId": "q12",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "allan21@gmail.com",
"actual": "allan21@gmail.com",
"correct": true,
"inputTokens": 5757,
"outputTokens": 9,
"latencyMs": 1437
},
{
"questionId": "q13",
"format": "json",
"model": "gpt-4o-mini",
"expected": "145843",
"actual": "145843",
"correct": true,
"inputTokens": 6389,
"outputTokens": 3,
"latencyMs": 1263
},
{
"questionId": "q13",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "145843",
"actual": "145843",
"correct": true,
"inputTokens": 7868,
"outputTokens": 6,
"latencyMs": 1277
},
{
"questionId": "q13",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "145843",
"actual": "145843",
"correct": true,
"inputTokens": 2526,
"outputTokens": 3,
"latencyMs": 1151
},
{
"questionId": "q13",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "145843",
"actual": "145843",
"correct": true,
"inputTokens": 2980,
"outputTokens": 6,
"latencyMs": 1260
},
{
"questionId": "q13",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "145843",
"actual": "145843",
"correct": true,
"inputTokens": 2380,
"outputTokens": 3,
"latencyMs": 1071
},
{
"questionId": "q13",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "145843",
"actual": "145843",
"correct": true,
"inputTokens": 2854,
"outputTokens": 6,
"latencyMs": 891
},
{
"questionId": "q13",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "145843",
"actual": "145843",
"correct": true,
"inputTokens": 6315,
"outputTokens": 3,
"latencyMs": 1548
},
{
"questionId": "q13",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "145843",
"actual": "145843",
"correct": true,
"inputTokens": 6363,
"outputTokens": 6,
"latencyMs": 1456
},
{
"questionId": "q13",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "145843",
"actual": "145843",
"correct": true,
"inputTokens": 5011,
"outputTokens": 3,
"latencyMs": 1268
},
{
"questionId": "q13",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "145843",
"actual": "145843",
"correct": true,
"inputTokens": 5758,
"outputTokens": 6,
"latencyMs": 1205
},
{
"questionId": "q14",
"format": "json",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6390,
"outputTokens": 2,
"latencyMs": 1310
},
{
"questionId": "q14",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 7868,
"outputTokens": 4,
"latencyMs": 1071
},
{
"questionId": "q14",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2527,
"outputTokens": 2,
"latencyMs": 895
},
{
"questionId": "q14",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2980,
"outputTokens": 4,
"latencyMs": 1020
},
{
"questionId": "q14",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2381,
"outputTokens": 2,
"latencyMs": 1168
},
{
"questionId": "q14",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2854,
"outputTokens": 4,
"latencyMs": 977
},
{
"questionId": "q14",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Operations",
"correct": false,
"inputTokens": 6316,
"outputTokens": 2,
"latencyMs": 1370
},
{
"questionId": "q14",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6363,
"outputTokens": 4,
"latencyMs": 1508
},
{
"questionId": "q14",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 5012,
"outputTokens": 2,
"latencyMs": 3622
},
{
"questionId": "q14",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 5758,
"outputTokens": 4,
"latencyMs": 1249
},
{
"questionId": "q15",
"format": "json",
"model": "gpt-4o-mini",
"expected": "alexandria61@gmail.com",
"actual": "alexandria61@gmail.com",
"correct": true,
"inputTokens": 6391,
"outputTokens": 7,
"latencyMs": 3269
},
{
"questionId": "q15",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "alexandria61@gmail.com",
"actual": "alexandria61@gmail.com",
"correct": true,
"inputTokens": 7869,
"outputTokens": 9,
"latencyMs": 1538
},
{
"questionId": "q15",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "alexandria61@gmail.com",
"actual": "alexandria61@gmail.com",
"correct": true,
"inputTokens": 2528,
"outputTokens": 7,
"latencyMs": 1413
},
{
"questionId": "q15",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "alexandria61@gmail.com",
"actual": "alexandria61@gmail.com",
"correct": true,
"inputTokens": 2981,
"outputTokens": 9,
"latencyMs": 1027
},
{
"questionId": "q15",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "alexandria61@gmail.com",
"actual": "alexandria61@gmail.com",
"correct": true,
"inputTokens": 2382,
"outputTokens": 7,
"latencyMs": 1257
},
{
"questionId": "q15",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "alexandria61@gmail.com",
"actual": "alexandria61@gmail.com",
"correct": true,
"inputTokens": 2855,
"outputTokens": 9,
"latencyMs": 1169
},
{
"questionId": "q15",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "alexandria61@gmail.com",
"actual": "alexandria61@gmail.com",
"correct": true,
"inputTokens": 6317,
"outputTokens": 7,
"latencyMs": 1464
},
{
"questionId": "q15",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "alexandria61@gmail.com",
"actual": "alexandria61@gmail.com",
"correct": true,
"inputTokens": 6364,
"outputTokens": 9,
"latencyMs": 1799
},
{
"questionId": "q15",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "alexandria61@gmail.com",
"actual": "alexandria61@gmail.com",
"correct": true,
"inputTokens": 5013,
"outputTokens": 7,
"latencyMs": 1616
},
{
"questionId": "q15",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "alexandria61@gmail.com",
"actual": "alexandria61@gmail.com",
"correct": true,
"inputTokens": 5759,
"outputTokens": 9,
"latencyMs": 1349
},
{
"questionId": "q16",
"format": "json",
"model": "gpt-4o-mini",
"expected": "89436",
"actual": "89436",
"correct": true,
"inputTokens": 6390,
"outputTokens": 3,
"latencyMs": 1298
},
{
"questionId": "q16",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "89436",
"actual": "89436",
"correct": true,
"inputTokens": 7870,
"outputTokens": 6,
"latencyMs": 1115
},
{
"questionId": "q16",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "89436",
"actual": "89436",
"correct": true,
"inputTokens": 2527,
"outputTokens": 3,
"latencyMs": 1180
},
{
"questionId": "q16",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "89436",
"actual": "89436",
"correct": true,
"inputTokens": 2982,
"outputTokens": 6,
"latencyMs": 1110
},
{
"questionId": "q16",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "89436",
"actual": "89436",
"correct": true,
"inputTokens": 2381,
"outputTokens": 3,
"latencyMs": 1235
},
{
"questionId": "q16",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "89436",
"actual": "89436",
"correct": true,
"inputTokens": 2856,
"outputTokens": 6,
"latencyMs": 1228
},
{
"questionId": "q16",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "89436",
"actual": "89436",
"correct": true,
"inputTokens": 6316,
"outputTokens": 3,
"latencyMs": 1832
},
{
"questionId": "q16",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "89436",
"actual": "89436",
"correct": true,
"inputTokens": 6365,
"outputTokens": 6,
"latencyMs": 1401
},
{
"questionId": "q16",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "89436",
"actual": "89436",
"correct": true,
"inputTokens": 5012,
"outputTokens": 3,
"latencyMs": 933
},
{
"questionId": "q16",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "89436",
"actual": "89436",
"correct": true,
"inputTokens": 5760,
"outputTokens": 6,
"latencyMs": 1570
},
{
"questionId": "q17",
"format": "json",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6393,
"outputTokens": 2,
"latencyMs": 1221
},
{
"questionId": "q17",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 7872,
"outputTokens": 4,
"latencyMs": 1293
},
{
"questionId": "q17",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2530,
"outputTokens": 2,
"latencyMs": 1147
},
{
"questionId": "q17",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2984,
"outputTokens": 4,
"latencyMs": 923
},
{
"questionId": "q17",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2384,
"outputTokens": 2,
"latencyMs": 1180
},
{
"questionId": "q17",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2858,
"outputTokens": 4,
"latencyMs": 1025
},
{
"questionId": "q17",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6319,
"outputTokens": 2,
"latencyMs": 1748
},
{
"questionId": "q17",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6367,
"outputTokens": 4,
"latencyMs": 1188
},
{
"questionId": "q17",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 5015,
"outputTokens": 2,
"latencyMs": 1452
},
{
"questionId": "q17",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 5762,
"outputTokens": 4,
"latencyMs": 1329
},
{
"questionId": "q18",
"format": "json",
"model": "gpt-4o-mini",
"expected": "kelvin54@yahoo.com",
"actual": "kelvin54@yahoo.com",
"correct": true,
"inputTokens": 6391,
"outputTokens": 6,
"latencyMs": 768
},
{
"questionId": "q18",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "kelvin54@yahoo.com",
"actual": "kelvin54@yahoo.com",
"correct": true,
"inputTokens": 7871,
"outputTokens": 10,
"latencyMs": 1150
},
{
"questionId": "q18",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "kelvin54@yahoo.com",
"actual": "kelvin54@yahoo.com",
"correct": true,
"inputTokens": 2528,
"outputTokens": 6,
"latencyMs": 1501
},
{
"questionId": "q18",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "kelvin54@yahoo.com",
"actual": "kelvin54@yahoo.com",
"correct": true,
"inputTokens": 2983,
"outputTokens": 10,
"latencyMs": 1201
},
{
"questionId": "q18",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "kelvin54@yahoo.com",
"actual": "kelvin54@yahoo.com",
"correct": true,
"inputTokens": 2382,
"outputTokens": 6,
"latencyMs": 1604
},
{
"questionId": "q18",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "kelvin54@yahoo.com",
"actual": "kelvin54@yahoo.com",
"correct": true,
"inputTokens": 2857,
"outputTokens": 10,
"latencyMs": 1060
},
{
"questionId": "q18",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "kelvin54@yahoo.com",
"actual": "kelvin54@yahoo.com",
"correct": true,
"inputTokens": 6317,
"outputTokens": 6,
"latencyMs": 1350
},
{
"questionId": "q18",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "kelvin54@yahoo.com",
"actual": "kelvin54@yahoo.com",
"correct": true,
"inputTokens": 6366,
"outputTokens": 10,
"latencyMs": 1154
},
{
"questionId": "q18",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "kelvin54@yahoo.com",
"actual": "kelvin54@yahoo.com",
"correct": true,
"inputTokens": 5013,
"outputTokens": 6,
"latencyMs": 1199
},
{
"questionId": "q18",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "kelvin54@yahoo.com",
"actual": "kelvin54@yahoo.com",
"correct": true,
"inputTokens": 5761,
"outputTokens": 10,
"latencyMs": 1216
},
{
"questionId": "q19",
"format": "json",
"model": "gpt-4o-mini",
"expected": "143365",
"actual": "143365",
"correct": true,
"inputTokens": 6391,
"outputTokens": 3,
"latencyMs": 1412
},
{
"questionId": "q19",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "143365",
"actual": "143365",
"correct": true,
"inputTokens": 7872,
"outputTokens": 6,
"latencyMs": 1908
},
{
"questionId": "q19",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "143365",
"actual": "143365",
"correct": true,
"inputTokens": 2528,
"outputTokens": 3,
"latencyMs": 1366
},
{
"questionId": "q19",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "143365",
"actual": "143365",
"correct": true,
"inputTokens": 2984,
"outputTokens": 6,
"latencyMs": 1054
},
{
"questionId": "q19",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "143365",
"actual": "143365",
"correct": true,
"inputTokens": 2382,
"outputTokens": 3,
"latencyMs": 1121
},
{
"questionId": "q19",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "143365",
"actual": "143365",
"correct": true,
"inputTokens": 2858,
"outputTokens": 6,
"latencyMs": 1262
},
{
"questionId": "q19",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "143365",
"actual": "143365",
"correct": true,
"inputTokens": 6317,
"outputTokens": 3,
"latencyMs": 4632
},
{
"questionId": "q19",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "143365",
"actual": "143365",
"correct": true,
"inputTokens": 6367,
"outputTokens": 6,
"latencyMs": 1118
},
{
"questionId": "q19",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "143365",
"actual": "143365",
"correct": true,
"inputTokens": 5013,
"outputTokens": 3,
"latencyMs": 928
},
{
"questionId": "q19",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "143365",
"actual": "143365",
"correct": true,
"inputTokens": 5762,
"outputTokens": 6,
"latencyMs": 1191
},
{
"questionId": "q20",
"format": "json",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6390,
"outputTokens": 2,
"latencyMs": 1053
},
{
"questionId": "q20",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 7868,
"outputTokens": 4,
"latencyMs": 1096
},
{
"questionId": "q20",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2527,
"outputTokens": 2,
"latencyMs": 1784
},
{
"questionId": "q20",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2980,
"outputTokens": 4,
"latencyMs": 1093
},
{
"questionId": "q20",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2381,
"outputTokens": 2,
"latencyMs": 1335
},
{
"questionId": "q20",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2854,
"outputTokens": 4,
"latencyMs": 1546
},
{
"questionId": "q20",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6316,
"outputTokens": 2,
"latencyMs": 1293
},
{
"questionId": "q20",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6363,
"outputTokens": 4,
"latencyMs": 1230
},
{
"questionId": "q20",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 5012,
"outputTokens": 2,
"latencyMs": 1467
},
{
"questionId": "q20",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 5758,
"outputTokens": 4,
"latencyMs": 1370
},
{
"questionId": "q21",
"format": "json",
"model": "gpt-4o-mini",
"expected": "dean19@gmail.com",
"actual": "dean19@gmail.com",
"correct": true,
"inputTokens": 6394,
"outputTokens": 6,
"latencyMs": 5026
},
{
"questionId": "q21",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "dean19@gmail.com",
"actual": "dean19@gmail.com",
"correct": true,
"inputTokens": 7876,
"outputTokens": 9,
"latencyMs": 1786
},
{
"questionId": "q21",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "dean19@gmail.com",
"actual": "dean19@gmail.com",
"correct": true,
"inputTokens": 2531,
"outputTokens": 6,
"latencyMs": 826
},
{
"questionId": "q21",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "dean19@gmail.com",
"actual": "dean19@gmail.com",
"correct": true,
"inputTokens": 2988,
"outputTokens": 9,
"latencyMs": 909
},
{
"questionId": "q21",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "dean19@gmail.com",
"actual": "dean19@gmail.com",
"correct": true,
"inputTokens": 2385,
"outputTokens": 6,
"latencyMs": 1120
},
{
"questionId": "q21",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "dean19@gmail.com",
"actual": "dean19@gmail.com",
"correct": true,
"inputTokens": 2862,
"outputTokens": 9,
"latencyMs": 996
},
{
"questionId": "q21",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "dean19@gmail.com",
"actual": "dean19@gmail.com",
"correct": true,
"inputTokens": 6320,
"outputTokens": 6,
"latencyMs": 1639
},
{
"questionId": "q21",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "dean19@gmail.com",
"actual": "dean19@gmail.com",
"correct": true,
"inputTokens": 6371,
"outputTokens": 9,
"latencyMs": 1299
},
{
"questionId": "q21",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "dean19@gmail.com",
"actual": "dean19@gmail.com",
"correct": true,
"inputTokens": 5016,
"outputTokens": 6,
"latencyMs": 1151
},
{
"questionId": "q21",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "dean19@gmail.com",
"actual": "dean19@gmail.com",
"correct": true,
"inputTokens": 5766,
"outputTokens": 9,
"latencyMs": 1246
},
{
"questionId": "q22",
"format": "json",
"model": "gpt-4o-mini",
"expected": "111314",
"actual": "111314",
"correct": true,
"inputTokens": 6392,
"outputTokens": 3,
"latencyMs": 1838
},
{
"questionId": "q22",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "111314",
"actual": "111314",
"correct": true,
"inputTokens": 7871,
"outputTokens": 6,
"latencyMs": 1191
},
{
"questionId": "q22",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "111314",
"actual": "111314",
"correct": true,
"inputTokens": 2529,
"outputTokens": 3,
"latencyMs": 980
},
{
"questionId": "q22",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "111314",
"actual": "111314",
"correct": true,
"inputTokens": 2983,
"outputTokens": 6,
"latencyMs": 1299
},
{
"questionId": "q22",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "111314",
"actual": "111314",
"correct": true,
"inputTokens": 2383,
"outputTokens": 3,
"latencyMs": 1027
},
{
"questionId": "q22",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "111314",
"actual": "111314",
"correct": true,
"inputTokens": 2857,
"outputTokens": 6,
"latencyMs": 1433
},
{
"questionId": "q22",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "111314",
"actual": "111314",
"correct": true,
"inputTokens": 6318,
"outputTokens": 3,
"latencyMs": 2256
},
{
"questionId": "q22",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "111314",
"actual": "111314",
"correct": true,
"inputTokens": 6366,
"outputTokens": 6,
"latencyMs": 1091
},
{
"questionId": "q22",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "111314",
"actual": "111314",
"correct": true,
"inputTokens": 5014,
"outputTokens": 3,
"latencyMs": 1288
},
{
"questionId": "q22",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "111314",
"actual": "111314",
"correct": true,
"inputTokens": 5761,
"outputTokens": 6,
"latencyMs": 1306
},
{
"questionId": "q23",
"format": "json",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6389,
"outputTokens": 2,
"latencyMs": 1951
},
{
"questionId": "q23",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 7868,
"outputTokens": 4,
"latencyMs": 1440
},
{
"questionId": "q23",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2526,
"outputTokens": 2,
"latencyMs": 978
},
{
"questionId": "q23",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2980,
"outputTokens": 4,
"latencyMs": 1385
},
{
"questionId": "q23",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2380,
"outputTokens": 2,
"latencyMs": 2311
},
{
"questionId": "q23",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2854,
"outputTokens": 4,
"latencyMs": 1066
},
{
"questionId": "q23",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6315,
"outputTokens": 2,
"latencyMs": 1914
},
{
"questionId": "q23",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6363,
"outputTokens": 4,
"latencyMs": 1596
},
{
"questionId": "q23",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 5011,
"outputTokens": 2,
"latencyMs": 1820
},
{
"questionId": "q23",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 5758,
"outputTokens": 4,
"latencyMs": 1067
},
{
"questionId": "q24",
"format": "json",
"model": "gpt-4o-mini",
"expected": "laurel54@yahoo.com",
"actual": "laurel54@yahoo.com",
"correct": true,
"inputTokens": 6391,
"outputTokens": 6,
"latencyMs": 2594
},
{
"questionId": "q24",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "laurel54@yahoo.com",
"actual": "laurel54@yahoo.com",
"correct": true,
"inputTokens": 7869,
"outputTokens": 10,
"latencyMs": 1139
},
{
"questionId": "q24",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "laurel54@yahoo.com",
"actual": "laurel54@yahoo.com",
"correct": true,
"inputTokens": 2528,
"outputTokens": 6,
"latencyMs": 1225
},
{
"questionId": "q24",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "laurel54@yahoo.com",
"actual": "laurel54@yahoo.com",
"correct": true,
"inputTokens": 2981,
"outputTokens": 10,
"latencyMs": 1082
},
{
"questionId": "q24",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "laurel54@yahoo.com",
"actual": "laurel54@yahoo.com",
"correct": true,
"inputTokens": 2382,
"outputTokens": 6,
"latencyMs": 4857
},
{
"questionId": "q24",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "laurel54@yahoo.com",
"actual": "laurel54@yahoo.com",
"correct": true,
"inputTokens": 2855,
"outputTokens": 10,
"latencyMs": 1082
},
{
"questionId": "q24",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "laurel54@yahoo.com",
"actual": "laurel54@yahoo.com",
"correct": true,
"inputTokens": 6317,
"outputTokens": 6,
"latencyMs": 1272
},
{
"questionId": "q24",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "laurel54@yahoo.com",
"actual": "laurel54@yahoo.com",
"correct": true,
"inputTokens": 6364,
"outputTokens": 10,
"latencyMs": 1201
},
{
"questionId": "q24",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "laurel54@yahoo.com",
"actual": "laurel54@yahoo.com",
"correct": true,
"inputTokens": 5013,
"outputTokens": 6,
"latencyMs": 1197
},
{
"questionId": "q24",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "laurel54@yahoo.com",
"actual": "laurel54@yahoo.com",
"correct": true,
"inputTokens": 5759,
"outputTokens": 10,
"latencyMs": 1198
},
{
"questionId": "q25",
"format": "json",
"model": "gpt-4o-mini",
"expected": "89553",
"actual": "89553",
"correct": true,
"inputTokens": 6392,
"outputTokens": 3,
"latencyMs": 1085
},
{
"questionId": "q25",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "89553",
"actual": "89553",
"correct": true,
"inputTokens": 7873,
"outputTokens": 6,
"latencyMs": 1102
},
{
"questionId": "q25",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "89553",
"actual": "89553",
"correct": true,
"inputTokens": 2529,
"outputTokens": 3,
"latencyMs": 1350
},
{
"questionId": "q25",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "89553",
"actual": "89553",
"correct": true,
"inputTokens": 2985,
"outputTokens": 6,
"latencyMs": 1300
},
{
"questionId": "q25",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "89553",
"actual": "89553",
"correct": true,
"inputTokens": 2383,
"outputTokens": 3,
"latencyMs": 998
},
{
"questionId": "q25",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "89553",
"actual": "89553",
"correct": true,
"inputTokens": 2859,
"outputTokens": 6,
"latencyMs": 972
},
{
"questionId": "q25",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "89553",
"actual": "89553",
"correct": true,
"inputTokens": 6318,
"outputTokens": 3,
"latencyMs": 1331
},
{
"questionId": "q25",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "89553",
"actual": "89553",
"correct": true,
"inputTokens": 6368,
"outputTokens": 6,
"latencyMs": 1027
},
{
"questionId": "q25",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "89553",
"actual": "89553",
"correct": true,
"inputTokens": 5014,
"outputTokens": 3,
"latencyMs": 1170
},
{
"questionId": "q25",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "89553",
"actual": "89553",
"correct": true,
"inputTokens": 5763,
"outputTokens": 6,
"latencyMs": 1074
},
{
"questionId": "q26",
"format": "json",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6389,
"outputTokens": 2,
"latencyMs": 1862
},
{
"questionId": "q26",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 7866,
"outputTokens": 4,
"latencyMs": 1435
},
{
"questionId": "q26",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2526,
"outputTokens": 2,
"latencyMs": 989
},
{
"questionId": "q26",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2978,
"outputTokens": 4,
"latencyMs": 1035
},
{
"questionId": "q26",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2380,
"outputTokens": 2,
"latencyMs": 2157
},
{
"questionId": "q26",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2852,
"outputTokens": 4,
"latencyMs": 1094
},
{
"questionId": "q26",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6315,
"outputTokens": 2,
"latencyMs": 1912
},
{
"questionId": "q26",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6361,
"outputTokens": 4,
"latencyMs": 1364
},
{
"questionId": "q26",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 5011,
"outputTokens": 2,
"latencyMs": 1435
},
{
"questionId": "q26",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 5756,
"outputTokens": 4,
"latencyMs": 1082
},
{
"questionId": "q27",
"format": "json",
"model": "gpt-4o-mini",
"expected": "jayme.kertzmann77@gmail.com",
"actual": "jayme.kertzmann77@gmail.com",
"correct": true,
"inputTokens": 6392,
"outputTokens": 9,
"latencyMs": 1274
},
{
"questionId": "q27",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "jayme.kertzmann77@gmail.com",
"actual": "jayme.kertzmann77@gmail.com",
"correct": true,
"inputTokens": 7871,
"outputTokens": 14,
"latencyMs": 1130
},
{
"questionId": "q27",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "jayme.kertzmann77@gmail.com",
"actual": "jayme.kertzmann77@gmail.com",
"correct": true,
"inputTokens": 2529,
"outputTokens": 9,
"latencyMs": 1795
},
{
"questionId": "q27",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "jayme.kertzmann77@gmail.com",
"actual": "jayme.kertzmann77@gmail.com",
"correct": true,
"inputTokens": 2983,
"outputTokens": 14,
"latencyMs": 1309
},
{
"questionId": "q27",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "jayme.kertzmann77@gmail.com",
"actual": "jayme.kertzmann77@gmail.com",
"correct": true,
"inputTokens": 2383,
"outputTokens": 9,
"latencyMs": 1406
},
{
"questionId": "q27",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "jayme.kertzmann77@gmail.com",
"actual": "jayme.kertzmann77@gmail.com",
"correct": true,
"inputTokens": 2857,
"outputTokens": 14,
"latencyMs": 1398
},
{
"questionId": "q27",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "jayme.kertzmann77@gmail.com",
"actual": "jayme.kertzmann77@gmail.com",
"correct": true,
"inputTokens": 6318,
"outputTokens": 9,
"latencyMs": 1114
},
{
"questionId": "q27",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "jayme.kertzmann77@gmail.com",
"actual": "jayme.kertzmann77@gmail.com",
"correct": true,
"inputTokens": 6366,
"outputTokens": 14,
"latencyMs": 1251
},
{
"questionId": "q27",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "jayme.kertzmann77@gmail.com",
"actual": "jayme.kertzmann77@gmail.com",
"correct": true,
"inputTokens": 5014,
"outputTokens": 9,
"latencyMs": 1941
},
{
"questionId": "q27",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "jayme.kertzmann77@gmail.com",
"actual": "jayme.kertzmann77@gmail.com",
"correct": true,
"inputTokens": 5761,
"outputTokens": 14,
"latencyMs": 1218
},
{
"questionId": "q28",
"format": "json",
"model": "gpt-4o-mini",
"expected": "104053",
"actual": "104053",
"correct": true,
"inputTokens": 6391,
"outputTokens": 3,
"latencyMs": 1395
},
{
"questionId": "q28",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "104053",
"actual": "104053",
"correct": true,
"inputTokens": 7871,
"outputTokens": 6,
"latencyMs": 1342
},
{
"questionId": "q28",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "104053",
"actual": "104053",
"correct": true,
"inputTokens": 2528,
"outputTokens": 3,
"latencyMs": 919
},
{
"questionId": "q28",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "104053",
"actual": "104053",
"correct": true,
"inputTokens": 2983,
"outputTokens": 6,
"latencyMs": 1187
},
{
"questionId": "q28",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "104053",
"actual": "104053",
"correct": true,
"inputTokens": 2382,
"outputTokens": 3,
"latencyMs": 1131
},
{
"questionId": "q28",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "104053",
"actual": "104053",
"correct": true,
"inputTokens": 2857,
"outputTokens": 6,
"latencyMs": 1191
},
{
"questionId": "q28",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "104053",
"actual": "104053",
"correct": true,
"inputTokens": 6317,
"outputTokens": 3,
"latencyMs": 1435
},
{
"questionId": "q28",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "104053",
"actual": "104053",
"correct": true,
"inputTokens": 6366,
"outputTokens": 6,
"latencyMs": 1095
},
{
"questionId": "q28",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "104053",
"actual": "104053",
"correct": true,
"inputTokens": 5013,
"outputTokens": 3,
"latencyMs": 4588
},
{
"questionId": "q28",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "104053",
"actual": "104053",
"correct": true,
"inputTokens": 5761,
"outputTokens": 6,
"latencyMs": 1291
},
{
"questionId": "q29",
"format": "json",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6392,
"outputTokens": 2,
"latencyMs": 1688
},
{
"questionId": "q29",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 7872,
"outputTokens": 4,
"latencyMs": 1301
},
{
"questionId": "q29",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2529,
"outputTokens": 2,
"latencyMs": 1914
},
{
"questionId": "q29",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2984,
"outputTokens": 4,
"latencyMs": 1447
},
{
"questionId": "q29",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2383,
"outputTokens": 2,
"latencyMs": 1725
},
{
"questionId": "q29",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2858,
"outputTokens": 4,
"latencyMs": 923
},
{
"questionId": "q29",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6318,
"outputTokens": 2,
"latencyMs": 879
},
{
"questionId": "q29",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6367,
"outputTokens": 4,
"latencyMs": 1322
},
{
"questionId": "q29",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 5014,
"outputTokens": 2,
"latencyMs": 1394
},
{
"questionId": "q29",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 5762,
"outputTokens": 4,
"latencyMs": 1008
},
{
"questionId": "q30",
"format": "json",
"model": "gpt-4o-mini",
"expected": "carley.bauch@yahoo.com",
"actual": "carley.bauch@yahoo.com",
"correct": true,
"inputTokens": 6391,
"outputTokens": 7,
"latencyMs": 894
},
{
"questionId": "q30",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "carley.bauch@yahoo.com",
"actual": "carley.bauch@yahoo.com",
"correct": true,
"inputTokens": 7869,
"outputTokens": 12,
"latencyMs": 1220
},
{
"questionId": "q30",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "carley.bauch@yahoo.com",
"actual": "carley.bauch@yahoo.com",
"correct": true,
"inputTokens": 2528,
"outputTokens": 7,
"latencyMs": 2225
},
{
"questionId": "q30",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "carley.bauch@yahoo.com",
"actual": "carley.bauch@yahoo.com",
"correct": true,
"inputTokens": 2981,
"outputTokens": 12,
"latencyMs": 1282
},
{
"questionId": "q30",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "carley.bauch@yahoo.com",
"actual": "carley.bauch@yahoo.com",
"correct": true,
"inputTokens": 2382,
"outputTokens": 7,
"latencyMs": 1414
},
{
"questionId": "q30",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "carley.bauch@yahoo.com",
"actual": "carley.bauch@yahoo.com",
"correct": true,
"inputTokens": 2855,
"outputTokens": 12,
"latencyMs": 1686
},
{
"questionId": "q30",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "carley.bauch@yahoo.com",
"actual": "carley.bauch@yahoo.com",
"correct": true,
"inputTokens": 6317,
"outputTokens": 7,
"latencyMs": 1113
},
{
"questionId": "q30",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "carley.bauch@yahoo.com",
"actual": "carley.bauch@yahoo.com",
"correct": true,
"inputTokens": 6364,
"outputTokens": 12,
"latencyMs": 1089
},
{
"questionId": "q30",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "carley.bauch@yahoo.com",
"actual": "carley.bauch@yahoo.com",
"correct": true,
"inputTokens": 5013,
"outputTokens": 7,
"latencyMs": 949
},
{
"questionId": "q30",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "carley.bauch@yahoo.com",
"actual": "carley.bauch@yahoo.com",
"correct": true,
"inputTokens": 5759,
"outputTokens": 12,
"latencyMs": 1273
},
{
"questionId": "q31",
"format": "json",
"model": "gpt-4o-mini",
"expected": "142029",
"actual": "142029",
"correct": true,
"inputTokens": 6394,
"outputTokens": 3,
"latencyMs": 4741
},
{
"questionId": "q31",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "142029",
"actual": "142029",
"correct": true,
"inputTokens": 7874,
"outputTokens": 6,
"latencyMs": 1132
},
{
"questionId": "q31",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "142029",
"actual": "142029",
"correct": true,
"inputTokens": 2531,
"outputTokens": 3,
"latencyMs": 1184
},
{
"questionId": "q31",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "142029",
"actual": "142029",
"correct": true,
"inputTokens": 2986,
"outputTokens": 6,
"latencyMs": 1137
},
{
"questionId": "q31",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "142029",
"actual": "142029",
"correct": true,
"inputTokens": 2385,
"outputTokens": 3,
"latencyMs": 963
},
{
"questionId": "q31",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "142029",
"actual": "142029",
"correct": true,
"inputTokens": 2860,
"outputTokens": 6,
"latencyMs": 1096
},
{
"questionId": "q31",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "142029",
"actual": "142029",
"correct": true,
"inputTokens": 6320,
"outputTokens": 3,
"latencyMs": 1399
},
{
"questionId": "q31",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "142029",
"actual": "142029",
"correct": true,
"inputTokens": 6369,
"outputTokens": 6,
"latencyMs": 1594
},
{
"questionId": "q31",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "142029",
"actual": "142029",
"correct": true,
"inputTokens": 5016,
"outputTokens": 3,
"latencyMs": 1900
},
{
"questionId": "q31",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "142029",
"actual": "142029",
"correct": true,
"inputTokens": 5764,
"outputTokens": 6,
"latencyMs": 1274
},
{
"questionId": "q32",
"format": "json",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Sales",
"correct": false,
"inputTokens": 6390,
"outputTokens": 2,
"latencyMs": 5224
},
{
"questionId": "q32",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 7869,
"outputTokens": 4,
"latencyMs": 1038
},
{
"questionId": "q32",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2527,
"outputTokens": 2,
"latencyMs": 1902
},
{
"questionId": "q32",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2981,
"outputTokens": 4,
"latencyMs": 1010
},
{
"questionId": "q32",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2381,
"outputTokens": 2,
"latencyMs": 3263
},
{
"questionId": "q32",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2855,
"outputTokens": 4,
"latencyMs": 871
},
{
"questionId": "q32",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Sales",
"correct": false,
"inputTokens": 6316,
"outputTokens": 2,
"latencyMs": 1278
},
{
"questionId": "q32",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6364,
"outputTokens": 4,
"latencyMs": 1048
},
{
"questionId": "q32",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Sales",
"correct": false,
"inputTokens": 5012,
"outputTokens": 2,
"latencyMs": 1271
},
{
"questionId": "q32",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 5759,
"outputTokens": 4,
"latencyMs": 1075
},
{
"questionId": "q33",
"format": "json",
"model": "gpt-4o-mini",
"expected": "cheyenne_skiles@hotmail.com",
"actual": "cheyenne_skiles@hotmail.com",
"correct": true,
"inputTokens": 6394,
"outputTokens": 7,
"latencyMs": 1139
},
{
"questionId": "q33",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "cheyenne_skiles@hotmail.com",
"actual": "cheyenne_skiles@hotmail.com",
"correct": true,
"inputTokens": 7872,
"outputTokens": 14,
"latencyMs": 1319
},
{
"questionId": "q33",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "cheyenne_skiles@hotmail.com",
"actual": "cheyenne_skiles@hotmail.com",
"correct": true,
"inputTokens": 2531,
"outputTokens": 7,
"latencyMs": 1856
},
{
"questionId": "q33",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "cheyenne_skiles@hotmail.com",
"actual": "cheyenne_skiles@hotmail.com",
"correct": true,
"inputTokens": 2984,
"outputTokens": 14,
"latencyMs": 1393
},
{
"questionId": "q33",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "cheyenne_skiles@hotmail.com",
"actual": "cheyenne_skiles@hotmail.com",
"correct": true,
"inputTokens": 2385,
"outputTokens": 7,
"latencyMs": 1766
},
{
"questionId": "q33",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "cheyenne_skiles@hotmail.com",
"actual": "cheyenne_skiles@hotmail.com",
"correct": true,
"inputTokens": 2858,
"outputTokens": 14,
"latencyMs": 1609
},
{
"questionId": "q33",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "cheyenne_skiles@hotmail.com",
"actual": "cheyenne_skiles@hotmail.com",
"correct": true,
"inputTokens": 6320,
"outputTokens": 7,
"latencyMs": 1329
},
{
"questionId": "q33",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "cheyenne_skiles@hotmail.com",
"actual": "cheyenne_skiles@hotmail.com",
"correct": true,
"inputTokens": 6367,
"outputTokens": 14,
"latencyMs": 1178
},
{
"questionId": "q33",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "cheyenne_skiles@hotmail.com",
"actual": "cheyenne_skiles@hotmail.com",
"correct": true,
"inputTokens": 5016,
"outputTokens": 7,
"latencyMs": 1890
},
{
"questionId": "q33",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "cheyenne_skiles@hotmail.com",
"actual": "cheyenne_skiles@hotmail.com",
"correct": true,
"inputTokens": 5762,
"outputTokens": 14,
"latencyMs": 1326
},
{
"questionId": "q34",
"format": "json",
"model": "gpt-4o-mini",
"expected": "84650",
"actual": "84650",
"correct": true,
"inputTokens": 6392,
"outputTokens": 3,
"latencyMs": 1898
},
{
"questionId": "q34",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "84650",
"actual": "84650",
"correct": true,
"inputTokens": 7871,
"outputTokens": 6,
"latencyMs": 1074
},
{
"questionId": "q34",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "84650",
"actual": "84650",
"correct": true,
"inputTokens": 2529,
"outputTokens": 3,
"latencyMs": 1382
},
{
"questionId": "q34",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "84650",
"actual": "84650",
"correct": true,
"inputTokens": 2983,
"outputTokens": 6,
"latencyMs": 1060
},
{
"questionId": "q34",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "84650",
"actual": "84650",
"correct": true,
"inputTokens": 2383,
"outputTokens": 3,
"latencyMs": 1286
},
{
"questionId": "q34",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "84650",
"actual": "84650",
"correct": true,
"inputTokens": 2857,
"outputTokens": 6,
"latencyMs": 1591
},
{
"questionId": "q34",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "84650",
"actual": "84650",
"correct": true,
"inputTokens": 6318,
"outputTokens": 3,
"latencyMs": 2158
},
{
"questionId": "q34",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "84650",
"actual": "84650",
"correct": true,
"inputTokens": 6366,
"outputTokens": 6,
"latencyMs": 1532
},
{
"questionId": "q34",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "84650",
"actual": "84650",
"correct": true,
"inputTokens": 5014,
"outputTokens": 3,
"latencyMs": 1381
},
{
"questionId": "q34",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "84650",
"actual": "84650",
"correct": true,
"inputTokens": 5761,
"outputTokens": 6,
"latencyMs": 2262
},
{
"questionId": "q35",
"format": "json",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6391,
"outputTokens": 2,
"latencyMs": 2664
},
{
"questionId": "q35",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 7871,
"outputTokens": 4,
"latencyMs": 1260
},
{
"questionId": "q35",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2528,
"outputTokens": 2,
"latencyMs": 1563
},
{
"questionId": "q35",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2983,
"outputTokens": 4,
"latencyMs": 1415
},
{
"questionId": "q35",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2382,
"outputTokens": 2,
"latencyMs": 1038
},
{
"questionId": "q35",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2857,
"outputTokens": 4,
"latencyMs": 1021
},
{
"questionId": "q35",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6317,
"outputTokens": 2,
"latencyMs": 4276
},
{
"questionId": "q35",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6366,
"outputTokens": 4,
"latencyMs": 1301
},
{
"questionId": "q35",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 5013,
"outputTokens": 2,
"latencyMs": 1399
},
{
"questionId": "q35",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 5761,
"outputTokens": 4,
"latencyMs": 1197
},
{
"questionId": "q36",
"format": "json",
"model": "gpt-4o-mini",
"expected": "macey.gottlieb5@yahoo.com",
"actual": "macey.gottlieb5@yahoo.com",
"correct": true,
"inputTokens": 6390,
"outputTokens": 9,
"latencyMs": 1390
},
{
"questionId": "q36",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "macey.gottlieb5@yahoo.com",
"actual": "macey.gottlieb5@yahoo.com",
"correct": true,
"inputTokens": 7869,
"outputTokens": 14,
"latencyMs": 1482
},
{
"questionId": "q36",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "macey.gottlieb5@yahoo.com",
"actual": "macey.gottlieb5@yahoo.com",
"correct": true,
"inputTokens": 2527,
"outputTokens": 9,
"latencyMs": 1754
},
{
"questionId": "q36",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "macey.gottlieb5@yahoo.com",
"actual": "macey.gottlieb5@yahoo.com",
"correct": true,
"inputTokens": 2981,
"outputTokens": 14,
"latencyMs": 1100
},
{
"questionId": "q36",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "macey.gottlieb5@yahoo.com",
"actual": "macey.gottlieb5@yahoo.com",
"correct": true,
"inputTokens": 2381,
"outputTokens": 9,
"latencyMs": 1421
},
{
"questionId": "q36",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "macey.gottlieb5@yahoo.com",
"actual": "macey.gottlieb5@yahoo.com",
"correct": true,
"inputTokens": 2855,
"outputTokens": 14,
"latencyMs": 2173
},
{
"questionId": "q36",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "macey.gottlieb5@yahoo.com",
"actual": "macey.gottlieb5@yahoo.com",
"correct": true,
"inputTokens": 6316,
"outputTokens": 9,
"latencyMs": 2911
},
{
"questionId": "q36",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "macey.gottlieb5@yahoo.com",
"actual": "macey.gottlieb5@yahoo.com",
"correct": true,
"inputTokens": 6364,
"outputTokens": 14,
"latencyMs": 1235
},
{
"questionId": "q36",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "macey.gottlieb5@yahoo.com",
"actual": "macey.gottlieb5@yahoo.com",
"correct": true,
"inputTokens": 5012,
"outputTokens": 9,
"latencyMs": 1303
},
{
"questionId": "q36",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "macey.gottlieb5@yahoo.com",
"actual": "macey.gottlieb5@yahoo.com",
"correct": true,
"inputTokens": 5759,
"outputTokens": 14,
"latencyMs": 1148
},
{
"questionId": "q37",
"format": "json",
"model": "gpt-4o-mini",
"expected": "89773",
"actual": "89773",
"correct": true,
"inputTokens": 6390,
"outputTokens": 3,
"latencyMs": 1430
},
{
"questionId": "q37",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "89773",
"actual": "89773",
"correct": true,
"inputTokens": 7868,
"outputTokens": 6,
"latencyMs": 1089
},
{
"questionId": "q37",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "89773",
"actual": "89773",
"correct": true,
"inputTokens": 2527,
"outputTokens": 3,
"latencyMs": 1059
},
{
"questionId": "q37",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "89773",
"actual": "89773",
"correct": true,
"inputTokens": 2980,
"outputTokens": 6,
"latencyMs": 1057
},
{
"questionId": "q37",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "89773",
"actual": "89773",
"correct": true,
"inputTokens": 2381,
"outputTokens": 3,
"latencyMs": 1716
},
{
"questionId": "q37",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "89773",
"actual": "89773",
"correct": true,
"inputTokens": 2854,
"outputTokens": 6,
"latencyMs": 904
},
{
"questionId": "q37",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "89773",
"actual": "89773",
"correct": true,
"inputTokens": 6316,
"outputTokens": 3,
"latencyMs": 2950
},
{
"questionId": "q37",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "89773",
"actual": "89773",
"correct": true,
"inputTokens": 6363,
"outputTokens": 6,
"latencyMs": 1189
},
{
"questionId": "q37",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "89773",
"actual": "89773",
"correct": true,
"inputTokens": 5012,
"outputTokens": 3,
"latencyMs": 1050
},
{
"questionId": "q37",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "89773",
"actual": "89773",
"correct": true,
"inputTokens": 5758,
"outputTokens": 6,
"latencyMs": 1329
},
{
"questionId": "q38",
"format": "json",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6390,
"outputTokens": 2,
"latencyMs": 3410
},
{
"questionId": "q38",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 7868,
"outputTokens": 4,
"latencyMs": 1891
},
{
"questionId": "q38",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2527,
"outputTokens": 2,
"latencyMs": 1010
},
{
"questionId": "q38",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2980,
"outputTokens": 4,
"latencyMs": 988
},
{
"questionId": "q38",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2381,
"outputTokens": 2,
"latencyMs": 1364
},
{
"questionId": "q38",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 2854,
"outputTokens": 4,
"latencyMs": 1395
},
{
"questionId": "q38",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6316,
"outputTokens": 2,
"latencyMs": 2293
},
{
"questionId": "q38",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 6363,
"outputTokens": 4,
"latencyMs": 1137
},
{
"questionId": "q38",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 5012,
"outputTokens": 2,
"latencyMs": 1451
},
{
"questionId": "q38",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Marketing",
"actual": "Marketing",
"correct": true,
"inputTokens": 5758,
"outputTokens": 4,
"latencyMs": 1100
},
{
"questionId": "q39",
"format": "json",
"model": "gpt-4o-mini",
"expected": "georgianna_renner@yahoo.com",
"actual": "georgianna_renner@yahoo.com",
"correct": true,
"inputTokens": 6390,
"outputTokens": 10,
"latencyMs": 1674
},
{
"questionId": "q39",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "georgianna_renner@yahoo.com",
"actual": "georgianna_renner@yahoo.com",
"correct": true,
"inputTokens": 7869,
"outputTokens": 13,
"latencyMs": 1403
},
{
"questionId": "q39",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "georgianna_renner@yahoo.com",
"actual": "georgianna_renner@yahoo.com",
"correct": true,
"inputTokens": 2527,
"outputTokens": 10,
"latencyMs": 1413
},
{
"questionId": "q39",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "georgianna_renner@yahoo.com",
"actual": "georgianna_renner@yahoo.com",
"correct": true,
"inputTokens": 2981,
"outputTokens": 13,
"latencyMs": 1200
},
{
"questionId": "q39",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "georgianna_renner@yahoo.com",
"actual": "georgianna_renner@yahoo.com",
"correct": true,
"inputTokens": 2381,
"outputTokens": 10,
"latencyMs": 1730
},
{
"questionId": "q39",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "georgianna_renner@yahoo.com",
"actual": "georgianna_renner@yahoo.com",
"correct": true,
"inputTokens": 2855,
"outputTokens": 13,
"latencyMs": 1226
},
{
"questionId": "q39",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "georgianna_renner@yahoo.com",
"actual": "georgianna_renner@yahoo.com",
"correct": true,
"inputTokens": 6316,
"outputTokens": 10,
"latencyMs": 1251
},
{
"questionId": "q39",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "georgianna_renner@yahoo.com",
"actual": "georgianna_renner@yahoo.com",
"correct": true,
"inputTokens": 6364,
"outputTokens": 13,
"latencyMs": 1337
},
{
"questionId": "q39",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "georgianna_renner@yahoo.com",
"actual": "georgianna_renner@yahoo.com",
"correct": true,
"inputTokens": 5012,
"outputTokens": 10,
"latencyMs": 2368
},
{
"questionId": "q39",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "georgianna_renner@yahoo.com",
"actual": "georgianna_renner@yahoo.com",
"correct": true,
"inputTokens": 5759,
"outputTokens": 13,
"latencyMs": 1251
},
{
"questionId": "q40",
"format": "json",
"model": "gpt-4o-mini",
"expected": "49741",
"actual": "49741",
"correct": true,
"inputTokens": 6391,
"outputTokens": 3,
"latencyMs": 3815
},
{
"questionId": "q40",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "49741",
"actual": "49741",
"correct": true,
"inputTokens": 7871,
"outputTokens": 6,
"latencyMs": 1169
},
{
"questionId": "q40",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "49741",
"actual": "49741",
"correct": true,
"inputTokens": 2528,
"outputTokens": 3,
"latencyMs": 1070
},
{
"questionId": "q40",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "49741",
"actual": "49741",
"correct": true,
"inputTokens": 2983,
"outputTokens": 6,
"latencyMs": 1162
},
{
"questionId": "q40",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "49741",
"actual": "49741",
"correct": true,
"inputTokens": 2382,
"outputTokens": 3,
"latencyMs": 1115
},
{
"questionId": "q40",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "49741",
"actual": "144426",
"correct": false,
"inputTokens": 2857,
"outputTokens": 6,
"latencyMs": 1365
},
{
"questionId": "q40",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "49741",
"actual": "49741",
"correct": true,
"inputTokens": 6317,
"outputTokens": 3,
"latencyMs": 2004
},
{
"questionId": "q40",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "49741",
"actual": "49741",
"correct": true,
"inputTokens": 6366,
"outputTokens": 6,
"latencyMs": 1113
},
{
"questionId": "q40",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "49741",
"actual": "49741",
"correct": true,
"inputTokens": 5013,
"outputTokens": 3,
"latencyMs": 3055
},
{
"questionId": "q40",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "49741",
"actual": "49741",
"correct": true,
"inputTokens": 5761,
"outputTokens": 6,
"latencyMs": 1392
},
{
"questionId": "q41",
"format": "json",
"model": "gpt-4o-mini",
"expected": "17",
"actual": "20",
"correct": false,
"inputTokens": 6388,
"outputTokens": 2,
"latencyMs": 3877
},
{
"questionId": "q41",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "15",
"correct": false,
"inputTokens": 7865,
"outputTokens": 5,
"latencyMs": 1128
},
{
"questionId": "q41",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "17",
"actual": "20",
"correct": false,
"inputTokens": 2525,
"outputTokens": 2,
"latencyMs": 966
},
{
"questionId": "q41",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "15",
"correct": false,
"inputTokens": 2977,
"outputTokens": 5,
"latencyMs": 1070
},
{
"questionId": "q41",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "17",
"actual": "20",
"correct": false,
"inputTokens": 2379,
"outputTokens": 2,
"latencyMs": 2411
},
{
"questionId": "q41",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "15",
"correct": false,
"inputTokens": 2851,
"outputTokens": 5,
"latencyMs": 1286
},
{
"questionId": "q41",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "17",
"actual": "20",
"correct": false,
"inputTokens": 6314,
"outputTokens": 2,
"latencyMs": 2082
},
{
"questionId": "q41",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "15",
"correct": false,
"inputTokens": 6360,
"outputTokens": 5,
"latencyMs": 1107
},
{
"questionId": "q41",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "17",
"actual": "20",
"correct": false,
"inputTokens": 5010,
"outputTokens": 2,
"latencyMs": 1216
},
{
"questionId": "q41",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "15",
"correct": false,
"inputTokens": 5755,
"outputTokens": 5,
"latencyMs": 1052
},
{
"questionId": "q42",
"format": "json",
"model": "gpt-4o-mini",
"expected": "17",
"actual": "20",
"correct": false,
"inputTokens": 6388,
"outputTokens": 2,
"latencyMs": 1572
},
{
"questionId": "q42",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "15",
"correct": false,
"inputTokens": 7865,
"outputTokens": 5,
"latencyMs": 1084
},
{
"questionId": "q42",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "17",
"actual": "20",
"correct": false,
"inputTokens": 2525,
"outputTokens": 2,
"latencyMs": 1377
},
{
"questionId": "q42",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "14",
"correct": false,
"inputTokens": 2977,
"outputTokens": 5,
"latencyMs": 1197
},
{
"questionId": "q42",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "17",
"actual": "20",
"correct": false,
"inputTokens": 2379,
"outputTokens": 2,
"latencyMs": 2705
},
{
"questionId": "q42",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "15",
"correct": false,
"inputTokens": 2851,
"outputTokens": 5,
"latencyMs": 1020
},
{
"questionId": "q42",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "17",
"actual": "20",
"correct": false,
"inputTokens": 6314,
"outputTokens": 2,
"latencyMs": 5345
},
{
"questionId": "q42",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "14",
"correct": false,
"inputTokens": 6360,
"outputTokens": 5,
"latencyMs": 1207
},
{
"questionId": "q42",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "17",
"actual": "20",
"correct": false,
"inputTokens": 5010,
"outputTokens": 2,
"latencyMs": 921
},
{
"questionId": "q42",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "15",
"correct": false,
"inputTokens": 5755,
"outputTokens": 5,
"latencyMs": 1289
},
{
"questionId": "q43",
"format": "json",
"model": "gpt-4o-mini",
"expected": "17",
"actual": "20",
"correct": false,
"inputTokens": 6388,
"outputTokens": 2,
"latencyMs": 2423
},
{
"questionId": "q43",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "15",
"correct": false,
"inputTokens": 7865,
"outputTokens": 5,
"latencyMs": 1273
},
{
"questionId": "q43",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "17",
"actual": "20",
"correct": false,
"inputTokens": 2525,
"outputTokens": 2,
"latencyMs": 975
},
{
"questionId": "q43",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "15",
"correct": false,
"inputTokens": 2977,
"outputTokens": 5,
"latencyMs": 1301
},
{
"questionId": "q43",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "17",
"actual": "20",
"correct": false,
"inputTokens": 2379,
"outputTokens": 2,
"latencyMs": 1423
},
{
"questionId": "q43",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "15",
"correct": false,
"inputTokens": 2851,
"outputTokens": 5,
"latencyMs": 927
},
{
"questionId": "q43",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "17",
"actual": "20",
"correct": false,
"inputTokens": 6314,
"outputTokens": 2,
"latencyMs": 1258
},
{
"questionId": "q43",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "15",
"correct": false,
"inputTokens": 6360,
"outputTokens": 5,
"latencyMs": 1250
},
{
"questionId": "q43",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "17",
"actual": "20",
"correct": false,
"inputTokens": 5010,
"outputTokens": 2,
"latencyMs": 872
},
{
"questionId": "q43",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "15",
"correct": false,
"inputTokens": 5755,
"outputTokens": 5,
"latencyMs": 1385
},
{
"questionId": "q44",
"format": "json",
"model": "gpt-4o-mini",
"expected": "17",
"actual": "20",
"correct": false,
"inputTokens": 6388,
"outputTokens": 2,
"latencyMs": 1201
},
{
"questionId": "q44",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "15",
"correct": false,
"inputTokens": 7865,
"outputTokens": 5,
"latencyMs": 1149
},
{
"questionId": "q44",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "17",
"actual": "20",
"correct": false,
"inputTokens": 2525,
"outputTokens": 2,
"latencyMs": 1498
},
{
"questionId": "q44",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "15",
"correct": false,
"inputTokens": 2977,
"outputTokens": 5,
"latencyMs": 1149
},
{
"questionId": "q44",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "17",
"actual": "20",
"correct": false,
"inputTokens": 2379,
"outputTokens": 2,
"latencyMs": 1098
},
{
"questionId": "q44",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "15",
"correct": false,
"inputTokens": 2851,
"outputTokens": 5,
"latencyMs": 1121
},
{
"questionId": "q44",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "17",
"actual": "20",
"correct": false,
"inputTokens": 6314,
"outputTokens": 2,
"latencyMs": 2522
},
{
"questionId": "q44",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "10",
"correct": false,
"inputTokens": 6360,
"outputTokens": 5,
"latencyMs": 1532
},
{
"questionId": "q44",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "17",
"actual": "20",
"correct": false,
"inputTokens": 5010,
"outputTokens": 2,
"latencyMs": 4914
},
{
"questionId": "q44",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "17",
"actual": "15",
"correct": false,
"inputTokens": 5755,
"outputTokens": 5,
"latencyMs": 1324
},
{
"questionId": "q45",
"format": "json",
"model": "gpt-4o-mini",
"expected": "16",
"actual": "20",
"correct": false,
"inputTokens": 6388,
"outputTokens": 2,
"latencyMs": 1446
},
{
"questionId": "q45",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "16",
"actual": "12",
"correct": false,
"inputTokens": 7865,
"outputTokens": 5,
"latencyMs": 1105
},
{
"questionId": "q45",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "16",
"actual": "20",
"correct": false,
"inputTokens": 2525,
"outputTokens": 2,
"latencyMs": 1297
},
{
"questionId": "q45",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "16",
"actual": "15",
"correct": false,
"inputTokens": 2977,
"outputTokens": 5,
"latencyMs": 1251
},
{
"questionId": "q45",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "16",
"actual": "20",
"correct": false,
"inputTokens": 2379,
"outputTokens": 2,
"latencyMs": 1561
},
{
"questionId": "q45",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "16",
"actual": "15",
"correct": false,
"inputTokens": 2851,
"outputTokens": 5,
"latencyMs": 1292
},
{
"questionId": "q45",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "16",
"actual": "20",
"correct": false,
"inputTokens": 6314,
"outputTokens": 2,
"latencyMs": 1127
},
{
"questionId": "q45",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "16",
"actual": "12",
"correct": false,
"inputTokens": 6360,
"outputTokens": 5,
"latencyMs": 1207
},
{
"questionId": "q45",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "16",
"actual": "20",
"correct": false,
"inputTokens": 5010,
"outputTokens": 2,
"latencyMs": 1582
},
{
"questionId": "q45",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "16",
"actual": "15",
"correct": false,
"inputTokens": 5755,
"outputTokens": 5,
"latencyMs": 1278
},
{
"questionId": "q46",
"format": "json",
"model": "gpt-4o-mini",
"expected": "16",
"actual": "20",
"correct": false,
"inputTokens": 6388,
"outputTokens": 2,
"latencyMs": 1278
},
{
"questionId": "q46",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "16",
"actual": "10",
"correct": false,
"inputTokens": 7865,
"outputTokens": 5,
"latencyMs": 3084
},
{
"questionId": "q46",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "16",
"actual": "20",
"correct": false,
"inputTokens": 2525,
"outputTokens": 2,
"latencyMs": 1289
},
{
"questionId": "q46",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "16",
"actual": "15",
"correct": false,
"inputTokens": 2977,
"outputTokens": 5,
"latencyMs": 1591
},
{
"questionId": "q46",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "16",
"actual": "20",
"correct": false,
"inputTokens": 2379,
"outputTokens": 2,
"latencyMs": 3038
},
{
"questionId": "q46",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "16",
"actual": "15",
"correct": false,
"inputTokens": 2851,
"outputTokens": 5,
"latencyMs": 1447
},
{
"questionId": "q46",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "16",
"actual": "20",
"correct": false,
"inputTokens": 6314,
"outputTokens": 2,
"latencyMs": 1224
},
{
"questionId": "q46",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "16",
"actual": "10",
"correct": false,
"inputTokens": 6360,
"outputTokens": 5,
"latencyMs": 1250
},
{
"questionId": "q46",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "16",
"actual": "20",
"correct": false,
"inputTokens": 5010,
"outputTokens": 2,
"latencyMs": 1364
},
{
"questionId": "q46",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "16",
"actual": "12",
"correct": false,
"inputTokens": 5755,
"outputTokens": 5,
"latencyMs": 1560
},
{
"questionId": "q47",
"format": "json",
"model": "gpt-4o-mini",
"expected": "91",
"actual": "66",
"correct": false,
"inputTokens": 6393,
"outputTokens": 2,
"latencyMs": 989
},
{
"questionId": "q47",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "91",
"actual": "89",
"correct": false,
"inputTokens": 7870,
"outputTokens": 5,
"latencyMs": 1358
},
{
"questionId": "q47",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "91",
"actual": "66",
"correct": false,
"inputTokens": 2530,
"outputTokens": 2,
"latencyMs": 1406
},
{
"questionId": "q47",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "91",
"actual": "85",
"correct": false,
"inputTokens": 2982,
"outputTokens": 5,
"latencyMs": 1123
},
{
"questionId": "q47",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "91",
"actual": "66",
"correct": false,
"inputTokens": 2384,
"outputTokens": 2,
"latencyMs": 4883
},
{
"questionId": "q47",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "91",
"actual": "85",
"correct": false,
"inputTokens": 2856,
"outputTokens": 5,
"latencyMs": 1402
},
{
"questionId": "q47",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "91",
"actual": "66",
"correct": false,
"inputTokens": 6319,
"outputTokens": 2,
"latencyMs": 1915
},
{
"questionId": "q47",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "91",
"actual": "89",
"correct": false,
"inputTokens": 6365,
"outputTokens": 5,
"latencyMs": 1263
},
{
"questionId": "q47",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "91",
"actual": "66",
"correct": false,
"inputTokens": 5015,
"outputTokens": 2,
"latencyMs": 1448
},
{
"questionId": "q47",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "91",
"actual": "89",
"correct": false,
"inputTokens": 5760,
"outputTokens": 5,
"latencyMs": 1243
},
{
"questionId": "q48",
"format": "json",
"model": "gpt-4o-mini",
"expected": "67",
"actual": "54",
"correct": false,
"inputTokens": 6393,
"outputTokens": 2,
"latencyMs": 1456
},
{
"questionId": "q48",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "67",
"actual": "57",
"correct": false,
"inputTokens": 7870,
"outputTokens": 5,
"latencyMs": 1186
},
{
"questionId": "q48",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "67",
"actual": "54",
"correct": false,
"inputTokens": 2530,
"outputTokens": 2,
"latencyMs": 1076
},
{
"questionId": "q48",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "67",
"actual": "47",
"correct": false,
"inputTokens": 2982,
"outputTokens": 5,
"latencyMs": 1168
},
{
"questionId": "q48",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "67",
"actual": "56",
"correct": false,
"inputTokens": 2384,
"outputTokens": 2,
"latencyMs": 3105
},
{
"questionId": "q48",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "67",
"actual": "47",
"correct": false,
"inputTokens": 2856,
"outputTokens": 5,
"latencyMs": 1375
},
{
"questionId": "q48",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "67",
"actual": "66",
"correct": false,
"inputTokens": 6319,
"outputTokens": 2,
"latencyMs": 1618
},
{
"questionId": "q48",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "67",
"actual": "47",
"correct": false,
"inputTokens": 6365,
"outputTokens": 5,
"latencyMs": 1454
},
{
"questionId": "q48",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "67",
"actual": "54",
"correct": false,
"inputTokens": 5015,
"outputTokens": 2,
"latencyMs": 1244
},
{
"questionId": "q48",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "67",
"actual": "57",
"correct": false,
"inputTokens": 5760,
"outputTokens": 5,
"latencyMs": 1113
},
{
"questionId": "q49",
"format": "json",
"model": "gpt-4o-mini",
"expected": "41",
"actual": "30",
"correct": false,
"inputTokens": 6393,
"outputTokens": 2,
"latencyMs": 1267
},
{
"questionId": "q49",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "41",
"actual": "31",
"correct": false,
"inputTokens": 7870,
"outputTokens": 5,
"latencyMs": 1227
},
{
"questionId": "q49",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "41",
"actual": "30",
"correct": false,
"inputTokens": 2530,
"outputTokens": 2,
"latencyMs": 1246
},
{
"questionId": "q49",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "41",
"actual": "27",
"correct": false,
"inputTokens": 2982,
"outputTokens": 5,
"latencyMs": 1127
},
{
"questionId": "q49",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "41",
"actual": "34",
"correct": false,
"inputTokens": 2384,
"outputTokens": 2,
"latencyMs": 1260
},
{
"questionId": "q49",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "41",
"actual": "31",
"correct": false,
"inputTokens": 2856,
"outputTokens": 5,
"latencyMs": 1293
},
{
"questionId": "q49",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "41",
"actual": "24",
"correct": false,
"inputTokens": 6319,
"outputTokens": 2,
"latencyMs": 1246
},
{
"questionId": "q49",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "41",
"actual": "27",
"correct": false,
"inputTokens": 6365,
"outputTokens": 5,
"latencyMs": 1598
},
{
"questionId": "q49",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "41",
"actual": "24",
"correct": false,
"inputTokens": 5015,
"outputTokens": 2,
"latencyMs": 1471
},
{
"questionId": "q49",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "41",
"actual": "31",
"correct": false,
"inputTokens": 5760,
"outputTokens": 5,
"latencyMs": 1311
},
{
"questionId": "q50",
"format": "json",
"model": "gpt-4o-mini",
"expected": "26",
"actual": "22",
"correct": false,
"inputTokens": 6393,
"outputTokens": 2,
"latencyMs": 3950
},
{
"questionId": "q50",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "26",
"actual": "20",
"correct": false,
"inputTokens": 7870,
"outputTokens": 5,
"latencyMs": 1075
},
{
"questionId": "q50",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "26",
"actual": "22",
"correct": false,
"inputTokens": 2530,
"outputTokens": 2,
"latencyMs": 1868
},
{
"questionId": "q50",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "26",
"actual": "16",
"correct": false,
"inputTokens": 2982,
"outputTokens": 5,
"latencyMs": 1075
},
{
"questionId": "q50",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "26",
"actual": "24",
"correct": false,
"inputTokens": 2384,
"outputTokens": 2,
"latencyMs": 1973
},
{
"questionId": "q50",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "26",
"actual": "16",
"correct": false,
"inputTokens": 2856,
"outputTokens": 5,
"latencyMs": 947
},
{
"questionId": "q50",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "26",
"actual": "22",
"correct": false,
"inputTokens": 6319,
"outputTokens": 2,
"latencyMs": 1414
},
{
"questionId": "q50",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "26",
"actual": "16",
"correct": false,
"inputTokens": 6365,
"outputTokens": 5,
"latencyMs": 1221
},
{
"questionId": "q50",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "26",
"actual": "18",
"correct": false,
"inputTokens": 5015,
"outputTokens": 2,
"latencyMs": 1148
},
{
"questionId": "q50",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "26",
"actual": "20",
"correct": false,
"inputTokens": 5760,
"outputTokens": 5,
"latencyMs": 1286
},
{
"questionId": "q51",
"format": "json",
"model": "gpt-4o-mini",
"expected": "78",
"actual": "66",
"correct": false,
"inputTokens": 6387,
"outputTokens": 2,
"latencyMs": 2525
},
{
"questionId": "q51",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "78",
"actual": "81",
"correct": false,
"inputTokens": 7864,
"outputTokens": 5,
"latencyMs": 1613
},
{
"questionId": "q51",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "78",
"actual": "66",
"correct": false,
"inputTokens": 2524,
"outputTokens": 2,
"latencyMs": 1132
},
{
"questionId": "q51",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "78",
"actual": "78",
"correct": true,
"inputTokens": 2976,
"outputTokens": 5,
"latencyMs": 1104
},
{
"questionId": "q51",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "78",
"actual": "77",
"correct": false,
"inputTokens": 2378,
"outputTokens": 2,
"latencyMs": 1069
},
{
"questionId": "q51",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "78",
"actual": "73",
"correct": false,
"inputTokens": 2850,
"outputTokens": 5,
"latencyMs": 1113
},
{
"questionId": "q51",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "78",
"actual": "66",
"correct": false,
"inputTokens": 6313,
"outputTokens": 2,
"latencyMs": 1999
},
{
"questionId": "q51",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "78",
"actual": "78",
"correct": true,
"inputTokens": 6359,
"outputTokens": 5,
"latencyMs": 1214
},
{
"questionId": "q51",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "78",
"actual": "66",
"correct": false,
"inputTokens": 5009,
"outputTokens": 2,
"latencyMs": 1613
},
{
"questionId": "q51",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "78",
"actual": "77",
"correct": false,
"inputTokens": 5754,
"outputTokens": 5,
"latencyMs": 1012
},
{
"questionId": "q52",
"format": "json",
"model": "gpt-4o-mini",
"expected": "22",
"actual": "30",
"correct": false,
"inputTokens": 6387,
"outputTokens": 2,
"latencyMs": 1580
},
{
"questionId": "q52",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "22",
"actual": "15",
"correct": false,
"inputTokens": 7864,
"outputTokens": 5,
"latencyMs": 1688
},
{
"questionId": "q52",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "22",
"actual": "22",
"correct": true,
"inputTokens": 2524,
"outputTokens": 2,
"latencyMs": 1290
},
{
"questionId": "q52",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "22",
"actual": "16",
"correct": false,
"inputTokens": 2976,
"outputTokens": 5,
"latencyMs": 1121
},
{
"questionId": "q52",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "22",
"actual": "10",
"correct": false,
"inputTokens": 2378,
"outputTokens": 2,
"latencyMs": 1544
},
{
"questionId": "q52",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "22",
"actual": "20",
"correct": false,
"inputTokens": 2850,
"outputTokens": 5,
"latencyMs": 822
},
{
"questionId": "q52",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "22",
"actual": "34",
"correct": false,
"inputTokens": 6313,
"outputTokens": 2,
"latencyMs": 2718
},
{
"questionId": "q52",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "22",
"actual": "15",
"correct": false,
"inputTokens": 6359,
"outputTokens": 5,
"latencyMs": 1211
},
{
"questionId": "q52",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "22",
"actual": "34",
"correct": false,
"inputTokens": 5009,
"outputTokens": 2,
"latencyMs": 1162
},
{
"questionId": "q52",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "22",
"actual": "16",
"correct": false,
"inputTokens": 5754,
"outputTokens": 5,
"latencyMs": 1156
},
{
"questionId": "q53",
"format": "json",
"model": "gpt-4o-mini",
"expected": "12",
"actual": "24",
"correct": false,
"inputTokens": 6395,
"outputTokens": 2,
"latencyMs": 1089
},
{
"questionId": "q53",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "12",
"actual": "9",
"correct": false,
"inputTokens": 7872,
"outputTokens": 5,
"latencyMs": 1368
},
{
"questionId": "q53",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "12",
"actual": "24",
"correct": false,
"inputTokens": 2532,
"outputTokens": 2,
"latencyMs": 1850
},
{
"questionId": "q53",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "12",
"actual": "9",
"correct": false,
"inputTokens": 2984,
"outputTokens": 5,
"latencyMs": 914
},
{
"questionId": "q53",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "12",
"actual": "34",
"correct": false,
"inputTokens": 2386,
"outputTokens": 2,
"latencyMs": 1156
},
{
"questionId": "q53",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "12",
"actual": "10",
"correct": false,
"inputTokens": 2858,
"outputTokens": 5,
"latencyMs": 1118
},
{
"questionId": "q53",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "12",
"actual": "22",
"correct": false,
"inputTokens": 6321,
"outputTokens": 2,
"latencyMs": 1020
},
{
"questionId": "q53",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "12",
"actual": "8",
"correct": false,
"inputTokens": 6367,
"outputTokens": 5,
"latencyMs": 1021
},
{
"questionId": "q53",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "12",
"actual": "18",
"correct": false,
"inputTokens": 5017,
"outputTokens": 2,
"latencyMs": 1236
},
{
"questionId": "q53",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "12",
"actual": "10",
"correct": false,
"inputTokens": 5762,
"outputTokens": 5,
"latencyMs": 1574
},
{
"questionId": "q54",
"format": "json",
"model": "gpt-4o-mini",
"expected": "11",
"actual": "24",
"correct": false,
"inputTokens": 6395,
"outputTokens": 2,
"latencyMs": 1437
},
{
"questionId": "q54",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "11",
"actual": "7",
"correct": false,
"inputTokens": 7872,
"outputTokens": 5,
"latencyMs": 1091
},
{
"questionId": "q54",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "11",
"actual": "24",
"correct": false,
"inputTokens": 2532,
"outputTokens": 2,
"latencyMs": 1917
},
{
"questionId": "q54",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "11",
"actual": "6",
"correct": false,
"inputTokens": 2984,
"outputTokens": 5,
"latencyMs": 1095
},
{
"questionId": "q54",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "11",
"actual": "34",
"correct": false,
"inputTokens": 2386,
"outputTokens": 2,
"latencyMs": 4230
},
{
"questionId": "q54",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "11",
"actual": "8",
"correct": false,
"inputTokens": 2858,
"outputTokens": 5,
"latencyMs": 1187
},
{
"questionId": "q54",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "11",
"actual": "24",
"correct": false,
"inputTokens": 6321,
"outputTokens": 2,
"latencyMs": 1197
},
{
"questionId": "q54",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "11",
"actual": "6",
"correct": false,
"inputTokens": 6367,
"outputTokens": 5,
"latencyMs": 1176
},
{
"questionId": "q54",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "11",
"actual": "18",
"correct": false,
"inputTokens": 5017,
"outputTokens": 2,
"latencyMs": 1249
},
{
"questionId": "q54",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "11",
"actual": "8",
"correct": false,
"inputTokens": 5762,
"outputTokens": 5,
"latencyMs": 1383
},
{
"questionId": "q55",
"format": "json",
"model": "gpt-4o-mini",
"expected": "11",
"actual": "30",
"correct": false,
"inputTokens": 6395,
"outputTokens": 2,
"latencyMs": 1149
},
{
"questionId": "q55",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "11",
"actual": "8",
"correct": false,
"inputTokens": 7872,
"outputTokens": 5,
"latencyMs": 1072
},
{
"questionId": "q55",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "11",
"actual": "18",
"correct": false,
"inputTokens": 2532,
"outputTokens": 2,
"latencyMs": 1213
},
{
"questionId": "q55",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "11",
"actual": "7",
"correct": false,
"inputTokens": 2984,
"outputTokens": 5,
"latencyMs": 1507
},
{
"questionId": "q55",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "11",
"actual": "34",
"correct": false,
"inputTokens": 2386,
"outputTokens": 2,
"latencyMs": 1826
},
{
"questionId": "q55",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "11",
"actual": "8",
"correct": false,
"inputTokens": 2858,
"outputTokens": 5,
"latencyMs": 1162
},
{
"questionId": "q55",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "11",
"actual": "24",
"correct": false,
"inputTokens": 6321,
"outputTokens": 2,
"latencyMs": 1008
},
{
"questionId": "q55",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "11",
"actual": "7",
"correct": false,
"inputTokens": 6367,
"outputTokens": 5,
"latencyMs": 1285
},
{
"questionId": "q55",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "11",
"actual": "22",
"correct": false,
"inputTokens": 5017,
"outputTokens": 2,
"latencyMs": 1124
},
{
"questionId": "q55",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "11",
"actual": "9",
"correct": false,
"inputTokens": 5762,
"outputTokens": 5,
"latencyMs": 1212
},
{
"questionId": "q56",
"format": "json",
"model": "gpt-4o-mini",
"expected": "12",
"actual": "22",
"correct": false,
"inputTokens": 6395,
"outputTokens": 2,
"latencyMs": 1232
},
{
"questionId": "q56",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "12",
"actual": "7",
"correct": false,
"inputTokens": 7872,
"outputTokens": 5,
"latencyMs": 1792
},
{
"questionId": "q56",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "12",
"actual": "12",
"correct": true,
"inputTokens": 2532,
"outputTokens": 2,
"latencyMs": 1357
},
{
"questionId": "q56",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "12",
"actual": "6",
"correct": false,
"inputTokens": 2984,
"outputTokens": 5,
"latencyMs": 1247
},
{
"questionId": "q56",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "12",
"actual": "22",
"correct": false,
"inputTokens": 2386,
"outputTokens": 2,
"latencyMs": 1043
},
{
"questionId": "q56",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "12",
"actual": "7",
"correct": false,
"inputTokens": 2858,
"outputTokens": 5,
"latencyMs": 1065
},
{
"questionId": "q56",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "12",
"actual": "10",
"correct": false,
"inputTokens": 6321,
"outputTokens": 2,
"latencyMs": 1298
},
{
"questionId": "q56",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "12",
"actual": "7",
"correct": false,
"inputTokens": 6367,
"outputTokens": 5,
"latencyMs": 1767
},
{
"questionId": "q56",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "12",
"actual": "10",
"correct": false,
"inputTokens": 5017,
"outputTokens": 2,
"latencyMs": 3525
},
{
"questionId": "q56",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "12",
"actual": "8",
"correct": false,
"inputTokens": 5762,
"outputTokens": 5,
"latencyMs": 1355
},
{
"questionId": "q57",
"format": "json",
"model": "gpt-4o-mini",
"expected": "62",
"actual": "54",
"correct": false,
"inputTokens": 6394,
"outputTokens": 2,
"latencyMs": 1359
},
{
"questionId": "q57",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "62",
"actual": "62",
"correct": true,
"inputTokens": 7872,
"outputTokens": 5,
"latencyMs": 1447
},
{
"questionId": "q57",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "62",
"actual": "54",
"correct": false,
"inputTokens": 2531,
"outputTokens": 2,
"latencyMs": 3832
},
{
"questionId": "q57",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "62",
"actual": "62",
"correct": true,
"inputTokens": 2984,
"outputTokens": 5,
"latencyMs": 1143
},
{
"questionId": "q57",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "62",
"actual": "66",
"correct": false,
"inputTokens": 2385,
"outputTokens": 2,
"latencyMs": 1370
},
{
"questionId": "q57",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "62",
"actual": "62",
"correct": true,
"inputTokens": 2858,
"outputTokens": 5,
"latencyMs": 1042
},
{
"questionId": "q57",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "62",
"actual": "54",
"correct": false,
"inputTokens": 6320,
"outputTokens": 2,
"latencyMs": 1015
},
{
"questionId": "q57",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "62",
"actual": "62",
"correct": true,
"inputTokens": 6367,
"outputTokens": 5,
"latencyMs": 1395
},
{
"questionId": "q57",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "62",
"actual": "54",
"correct": false,
"inputTokens": 5016,
"outputTokens": 2,
"latencyMs": 1008
},
{
"questionId": "q57",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "62",
"actual": "62",
"correct": true,
"inputTokens": 5762,
"outputTokens": 5,
"latencyMs": 1191
},
{
"questionId": "q58",
"format": "json",
"model": "gpt-4o-mini",
"expected": "45",
"actual": "38",
"correct": false,
"inputTokens": 6394,
"outputTokens": 2,
"latencyMs": 1304
},
{
"questionId": "q58",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "45",
"actual": "42",
"correct": false,
"inputTokens": 7872,
"outputTokens": 5,
"latencyMs": 1386
},
{
"questionId": "q58",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "45",
"actual": "38",
"correct": false,
"inputTokens": 2531,
"outputTokens": 2,
"latencyMs": 1433
},
{
"questionId": "q58",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "45",
"actual": "42",
"correct": false,
"inputTokens": 2984,
"outputTokens": 5,
"latencyMs": 967
},
{
"questionId": "q58",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "45",
"actual": "42",
"correct": false,
"inputTokens": 2385,
"outputTokens": 2,
"latencyMs": 2469
},
{
"questionId": "q58",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "45",
"actual": "42",
"correct": false,
"inputTokens": 2858,
"outputTokens": 5,
"latencyMs": 1382
},
{
"questionId": "q58",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "45",
"actual": "38",
"correct": false,
"inputTokens": 6320,
"outputTokens": 2,
"latencyMs": 1658
},
{
"questionId": "q58",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "45",
"actual": "42",
"correct": false,
"inputTokens": 6367,
"outputTokens": 5,
"latencyMs": 1450
},
{
"questionId": "q58",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "45",
"actual": "38",
"correct": false,
"inputTokens": 5016,
"outputTokens": 2,
"latencyMs": 1428
},
{
"questionId": "q58",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "45",
"actual": "38",
"correct": false,
"inputTokens": 5762,
"outputTokens": 5,
"latencyMs": 1144
},
{
"questionId": "q59",
"format": "json",
"model": "gpt-4o-mini",
"expected": "96.17",
"actual": "96.17",
"correct": true,
"inputTokens": 9740,
"outputTokens": 4,
"latencyMs": 1577
},
{
"questionId": "q59",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "96.17",
"actual": "96.17",
"correct": true,
"inputTokens": 11907,
"outputTokens": 7,
"latencyMs": 1181
},
{
"questionId": "q59",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "96.17",
"actual": "96.17",
"correct": true,
"inputTokens": 6014,
"outputTokens": 4,
"latencyMs": 1231
},
{
"questionId": "q59",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "96.17",
"actual": "96.17",
"correct": true,
"inputTokens": 6993,
"outputTokens": 7,
"latencyMs": 1407
},
{
"questionId": "q59",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "96.17",
"actual": "96.17",
"correct": true,
"inputTokens": 6782,
"outputTokens": 4,
"latencyMs": 1393
},
{
"questionId": "q59",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "96.17",
"actual": "96.17",
"correct": true,
"inputTokens": 8414,
"outputTokens": 7,
"latencyMs": 1534
},
{
"questionId": "q59",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "96.17",
"actual": "96.17",
"correct": true,
"inputTokens": 9159,
"outputTokens": 4,
"latencyMs": 1456
},
{
"questionId": "q59",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "96.17",
"actual": "96.17",
"correct": true,
"inputTokens": 9289,
"outputTokens": 7,
"latencyMs": 1933
},
{
"questionId": "q59",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "96.17",
"actual": "96.17",
"correct": true,
"inputTokens": 7374,
"outputTokens": 4,
"latencyMs": 1472
},
{
"questionId": "q59",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "96.17",
"actual": "96.17",
"correct": true,
"inputTokens": 8385,
"outputTokens": 7,
"latencyMs": 1224
},
{
"questionId": "q60",
"format": "json",
"model": "gpt-4o-mini",
"expected": "shipped",
"actual": "shipped",
"correct": true,
"inputTokens": 9739,
"outputTokens": 3,
"latencyMs": 2069
},
{
"questionId": "q60",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "shipped",
"actual": "shipped",
"correct": true,
"inputTokens": 11906,
"outputTokens": 4,
"latencyMs": 1172
},
{
"questionId": "q60",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "shipped",
"actual": "shipped",
"correct": true,
"inputTokens": 6013,
"outputTokens": 3,
"latencyMs": 1236
},
{
"questionId": "q60",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "shipped",
"actual": "shipped",
"correct": true,
"inputTokens": 6992,
"outputTokens": 4,
"latencyMs": 1157
},
{
"questionId": "q60",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "shipped",
"actual": "shipped",
"correct": true,
"inputTokens": 6781,
"outputTokens": 3,
"latencyMs": 1364
},
{
"questionId": "q60",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "shipped",
"actual": "shipped",
"correct": true,
"inputTokens": 8413,
"outputTokens": 4,
"latencyMs": 1041
},
{
"questionId": "q60",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "shipped",
"actual": "shipped",
"correct": true,
"inputTokens": 9158,
"outputTokens": 3,
"latencyMs": 1478
},
{
"questionId": "q60",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "shipped",
"actual": "shipped",
"correct": true,
"inputTokens": 9288,
"outputTokens": 4,
"latencyMs": 1266
},
{
"questionId": "q60",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "shipped",
"actual": "shipped",
"correct": true,
"inputTokens": 7373,
"outputTokens": 3,
"latencyMs": 3477
},
{
"questionId": "q60",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "shipped",
"actual": "shipped",
"correct": true,
"inputTokens": 8384,
"outputTokens": 4,
"latencyMs": 2630
},
{
"questionId": "q61",
"format": "json",
"model": "gpt-4o-mini",
"expected": "599.39",
"actual": "599.39",
"correct": true,
"inputTokens": 9740,
"outputTokens": 4,
"latencyMs": 1479
},
{
"questionId": "q61",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "599.39",
"actual": "599.39",
"correct": true,
"inputTokens": 11907,
"outputTokens": 7,
"latencyMs": 1270
},
{
"questionId": "q61",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "599.39",
"actual": "599.39",
"correct": true,
"inputTokens": 6014,
"outputTokens": 4,
"latencyMs": 1270
},
{
"questionId": "q61",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "599.39",
"actual": "599.39",
"correct": true,
"inputTokens": 6993,
"outputTokens": 7,
"latencyMs": 1342
},
{
"questionId": "q61",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "599.39",
"actual": "599.39",
"correct": true,
"inputTokens": 6782,
"outputTokens": 4,
"latencyMs": 1350
},
{
"questionId": "q61",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "599.39",
"actual": "599.39",
"correct": true,
"inputTokens": 8414,
"outputTokens": 7,
"latencyMs": 1205
},
{
"questionId": "q61",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "599.39",
"actual": "599.39",
"correct": true,
"inputTokens": 9159,
"outputTokens": 4,
"latencyMs": 1502
},
{
"questionId": "q61",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "599.39",
"actual": "599.39",
"correct": true,
"inputTokens": 9289,
"outputTokens": 7,
"latencyMs": 1571
},
{
"questionId": "q61",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "599.39",
"actual": "599.39",
"correct": true,
"inputTokens": 7374,
"outputTokens": 4,
"latencyMs": 2013
},
{
"questionId": "q61",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "599.39",
"actual": "599.39",
"correct": true,
"inputTokens": 8385,
"outputTokens": 7,
"latencyMs": 1428
},
{
"questionId": "q62",
"format": "json",
"model": "gpt-4o-mini",
"expected": "processing",
"actual": "processing",
"correct": true,
"inputTokens": 9739,
"outputTokens": 2,
"latencyMs": 1666
},
{
"questionId": "q62",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "processing",
"actual": "processing",
"correct": true,
"inputTokens": 11906,
"outputTokens": 4,
"latencyMs": 1549
},
{
"questionId": "q62",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "processing",
"actual": "processing",
"correct": true,
"inputTokens": 6013,
"outputTokens": 2,
"latencyMs": 1033
},
{
"questionId": "q62",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "processing",
"actual": "processing",
"correct": true,
"inputTokens": 6992,
"outputTokens": 4,
"latencyMs": 1061
},
{
"questionId": "q62",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "processing",
"actual": "processing",
"correct": true,
"inputTokens": 6781,
"outputTokens": 2,
"latencyMs": 2008
},
{
"questionId": "q62",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "processing",
"actual": "processing",
"correct": true,
"inputTokens": 8413,
"outputTokens": 4,
"latencyMs": 1214
},
{
"questionId": "q62",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "processing",
"actual": "processing",
"correct": true,
"inputTokens": 9158,
"outputTokens": 2,
"latencyMs": 1321
},
{
"questionId": "q62",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "processing",
"actual": "processing",
"correct": true,
"inputTokens": 9288,
"outputTokens": 4,
"latencyMs": 1311
},
{
"questionId": "q62",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "processing",
"actual": "processing",
"correct": true,
"inputTokens": 7373,
"outputTokens": 2,
"latencyMs": 1769
},
{
"questionId": "q62",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "processing",
"actual": "processing",
"correct": true,
"inputTokens": 8384,
"outputTokens": 4,
"latencyMs": 1157
},
{
"questionId": "q63",
"format": "json",
"model": "gpt-4o-mini",
"expected": "528.71",
"actual": "528.71",
"correct": true,
"inputTokens": 9740,
"outputTokens": 4,
"latencyMs": 1213
},
{
"questionId": "q63",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "528.71",
"actual": "528.71",
"correct": true,
"inputTokens": 11907,
"outputTokens": 7,
"latencyMs": 1332
},
{
"questionId": "q63",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "528.71",
"actual": "528.71",
"correct": true,
"inputTokens": 6014,
"outputTokens": 4,
"latencyMs": 3749
},
{
"questionId": "q63",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "528.71",
"actual": "528.71",
"correct": true,
"inputTokens": 6993,
"outputTokens": 7,
"latencyMs": 1326
},
{
"questionId": "q63",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "528.71",
"actual": "528.71",
"correct": true,
"inputTokens": 6782,
"outputTokens": 4,
"latencyMs": 947
},
{
"questionId": "q63",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "528.71",
"actual": "528.71",
"correct": true,
"inputTokens": 8414,
"outputTokens": 7,
"latencyMs": 1251
},
{
"questionId": "q63",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "528.71",
"actual": "528.71",
"correct": true,
"inputTokens": 9159,
"outputTokens": 4,
"latencyMs": 1428
},
{
"questionId": "q63",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "528.71",
"actual": "528.71",
"correct": true,
"inputTokens": 9289,
"outputTokens": 7,
"latencyMs": 1659
},
{
"questionId": "q63",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "528.71",
"actual": "528.71",
"correct": true,
"inputTokens": 7374,
"outputTokens": 4,
"latencyMs": 5584
},
{
"questionId": "q63",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "528.71",
"actual": "528.71",
"correct": true,
"inputTokens": 8385,
"outputTokens": 7,
"latencyMs": 1251
},
{
"questionId": "q64",
"format": "json",
"model": "gpt-4o-mini",
"expected": "pending",
"actual": "pending",
"correct": true,
"inputTokens": 9739,
"outputTokens": 2,
"latencyMs": 2425
},
{
"questionId": "q64",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "pending",
"actual": "pending",
"correct": true,
"inputTokens": 11906,
"outputTokens": 4,
"latencyMs": 1481
},
{
"questionId": "q64",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "pending",
"actual": "pending",
"correct": true,
"inputTokens": 6013,
"outputTokens": 2,
"latencyMs": 1109
},
{
"questionId": "q64",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "pending",
"actual": "pending",
"correct": true,
"inputTokens": 6992,
"outputTokens": 4,
"latencyMs": 1048
},
{
"questionId": "q64",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "pending",
"actual": "pending",
"correct": true,
"inputTokens": 6781,
"outputTokens": 2,
"latencyMs": 1256
},
{
"questionId": "q64",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "pending",
"actual": "pending",
"correct": true,
"inputTokens": 8413,
"outputTokens": 4,
"latencyMs": 1117
},
{
"questionId": "q64",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "pending",
"actual": "pending",
"correct": true,
"inputTokens": 9158,
"outputTokens": 2,
"latencyMs": 1168
},
{
"questionId": "q64",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "pending",
"actual": "pending",
"correct": true,
"inputTokens": 9288,
"outputTokens": 4,
"latencyMs": 1504
},
{
"questionId": "q64",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "pending",
"actual": "pending",
"correct": true,
"inputTokens": 7373,
"outputTokens": 2,
"latencyMs": 1134
},
{
"questionId": "q64",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "pending",
"actual": "pending",
"correct": true,
"inputTokens": 8384,
"outputTokens": 4,
"latencyMs": 1059
},
{
"questionId": "q65",
"format": "json",
"model": "gpt-4o-mini",
"expected": "1687.82",
"actual": "1687.82",
"correct": true,
"inputTokens": 9740,
"outputTokens": 5,
"latencyMs": 2361
},
{
"questionId": "q65",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "1687.82",
"actual": "1687.82",
"correct": true,
"inputTokens": 11907,
"outputTokens": 8,
"latencyMs": 1158
},
{
"questionId": "q65",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "1687.82",
"actual": "1687.82",
"correct": true,
"inputTokens": 6014,
"outputTokens": 5,
"latencyMs": 1493
},
{
"questionId": "q65",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "1687.82",
"actual": "1687.82",
"correct": true,
"inputTokens": 6993,
"outputTokens": 8,
"latencyMs": 1068
},
{
"questionId": "q65",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "1687.82",
"actual": "1687.82",
"correct": true,
"inputTokens": 6782,
"outputTokens": 5,
"latencyMs": 1490
},
{
"questionId": "q65",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "1687.82",
"actual": "1687.82",
"correct": true,
"inputTokens": 8414,
"outputTokens": 8,
"latencyMs": 1386
},
{
"questionId": "q65",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "1687.82",
"actual": "1687.82",
"correct": true,
"inputTokens": 9159,
"outputTokens": 5,
"latencyMs": 1470
},
{
"questionId": "q65",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "1687.82",
"actual": "1687.82",
"correct": true,
"inputTokens": 9289,
"outputTokens": 8,
"latencyMs": 1189
},
{
"questionId": "q65",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "1687.82",
"actual": "1687.82",
"correct": true,
"inputTokens": 7374,
"outputTokens": 5,
"latencyMs": 2824
},
{
"questionId": "q65",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "1687.82",
"actual": "1687.82",
"correct": true,
"inputTokens": 8385,
"outputTokens": 8,
"latencyMs": 1565
},
{
"questionId": "q66",
"format": "json",
"model": "gpt-4o-mini",
"expected": "cancelled",
"actual": "cancelled",
"correct": true,
"inputTokens": 9739,
"outputTokens": 3,
"latencyMs": 1480
},
{
"questionId": "q66",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "cancelled",
"actual": "cancelled",
"correct": true,
"inputTokens": 11906,
"outputTokens": 4,
"latencyMs": 1354
},
{
"questionId": "q66",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "cancelled",
"actual": "cancelled",
"correct": true,
"inputTokens": 6013,
"outputTokens": 3,
"latencyMs": 5334
},
{
"questionId": "q66",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "cancelled",
"actual": "cancelled",
"correct": true,
"inputTokens": 6992,
"outputTokens": 4,
"latencyMs": 1158
},
{
"questionId": "q66",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "cancelled",
"actual": "cancelled",
"correct": true,
"inputTokens": 6781,
"outputTokens": 3,
"latencyMs": 2043
},
{
"questionId": "q66",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "cancelled",
"actual": "cancelled",
"correct": true,
"inputTokens": 8413,
"outputTokens": 4,
"latencyMs": 1302
},
{
"questionId": "q66",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "cancelled",
"actual": "cancelled",
"correct": true,
"inputTokens": 9158,
"outputTokens": 3,
"latencyMs": 1006
},
{
"questionId": "q66",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "cancelled",
"actual": "cancelled",
"correct": true,
"inputTokens": 9288,
"outputTokens": 4,
"latencyMs": 1106
},
{
"questionId": "q66",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "cancelled",
"actual": "cancelled",
"correct": true,
"inputTokens": 7373,
"outputTokens": 3,
"latencyMs": 1801
},
{
"questionId": "q66",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "cancelled",
"actual": "cancelled",
"correct": true,
"inputTokens": 8384,
"outputTokens": 4,
"latencyMs": 1626
},
{
"questionId": "q67",
"format": "json",
"model": "gpt-4o-mini",
"expected": "423.6",
"actual": "423.6",
"correct": true,
"inputTokens": 9740,
"outputTokens": 4,
"latencyMs": 2107
},
{
"questionId": "q67",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "423.6",
"actual": "423.6",
"correct": true,
"inputTokens": 11907,
"outputTokens": 7,
"latencyMs": 1183
},
{
"questionId": "q67",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "423.6",
"actual": "423.6",
"correct": true,
"inputTokens": 6014,
"outputTokens": 4,
"latencyMs": 7091
},
{
"questionId": "q67",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "423.6",
"actual": "423.6",
"correct": true,
"inputTokens": 6993,
"outputTokens": 7,
"latencyMs": 1730
},
{
"questionId": "q67",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "423.6",
"actual": "423.6",
"correct": true,
"inputTokens": 6782,
"outputTokens": 4,
"latencyMs": 1222
},
{
"questionId": "q67",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "423.6",
"actual": "423.6",
"correct": true,
"inputTokens": 8414,
"outputTokens": 7,
"latencyMs": 1447
},
{
"questionId": "q67",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "423.6",
"actual": "423.6",
"correct": true,
"inputTokens": 9159,
"outputTokens": 4,
"latencyMs": 10295
},
{
"questionId": "q67",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "423.6",
"actual": "423.6",
"correct": true,
"inputTokens": 9289,
"outputTokens": 7,
"latencyMs": 1228
},
{
"questionId": "q67",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "423.6",
"actual": "423.6",
"correct": true,
"inputTokens": 7374,
"outputTokens": 4,
"latencyMs": 1748
},
{
"questionId": "q67",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "423.6",
"actual": "423.6",
"correct": true,
"inputTokens": 8385,
"outputTokens": 7,
"latencyMs": 1373
},
{
"questionId": "q68",
"format": "json",
"model": "gpt-4o-mini",
"expected": "delivered",
"actual": "delivered",
"correct": true,
"inputTokens": 9739,
"outputTokens": 3,
"latencyMs": 3836
},
{
"questionId": "q68",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "delivered",
"actual": "delivered",
"correct": true,
"inputTokens": 11906,
"outputTokens": 4,
"latencyMs": 1297
},
{
"questionId": "q68",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "delivered",
"actual": "delivered",
"correct": true,
"inputTokens": 6013,
"outputTokens": 3,
"latencyMs": 1927
},
{
"questionId": "q68",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "delivered",
"actual": "delivered",
"correct": true,
"inputTokens": 6992,
"outputTokens": 4,
"latencyMs": 1171
},
{
"questionId": "q68",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "delivered",
"actual": "delivered",
"correct": true,
"inputTokens": 6781,
"outputTokens": 3,
"latencyMs": 1551
},
{
"questionId": "q68",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "delivered",
"actual": "delivered",
"correct": true,
"inputTokens": 8413,
"outputTokens": 4,
"latencyMs": 1273
},
{
"questionId": "q68",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "delivered",
"actual": "delivered",
"correct": true,
"inputTokens": 9158,
"outputTokens": 3,
"latencyMs": 1387
},
{
"questionId": "q68",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "delivered",
"actual": "delivered",
"correct": true,
"inputTokens": 9288,
"outputTokens": 4,
"latencyMs": 1237
},
{
"questionId": "q68",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "delivered",
"actual": "delivered",
"correct": true,
"inputTokens": 7373,
"outputTokens": 3,
"latencyMs": 1934
},
{
"questionId": "q68",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "delivered",
"actual": "delivered",
"correct": true,
"inputTokens": 8384,
"outputTokens": 4,
"latencyMs": 1132
},
{
"questionId": "q69",
"format": "json",
"model": "gpt-4o-mini",
"expected": "784.03",
"actual": "784.03",
"correct": true,
"inputTokens": 9740,
"outputTokens": 4,
"latencyMs": 2267
},
{
"questionId": "q69",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "784.03",
"actual": "784.03",
"correct": true,
"inputTokens": 11907,
"outputTokens": 7,
"latencyMs": 1772
},
{
"questionId": "q69",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "784.03",
"actual": "784.03",
"correct": true,
"inputTokens": 6014,
"outputTokens": 4,
"latencyMs": 1315
},
{
"questionId": "q69",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "784.03",
"actual": "784.03",
"correct": true,
"inputTokens": 6993,
"outputTokens": 7,
"latencyMs": 1165
},
{
"questionId": "q69",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "784.03",
"actual": "784.03",
"correct": true,
"inputTokens": 6782,
"outputTokens": 4,
"latencyMs": 1097
},
{
"questionId": "q69",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "784.03",
"actual": "784.03",
"correct": true,
"inputTokens": 8414,
"outputTokens": 7,
"latencyMs": 1299
},
{
"questionId": "q69",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "784.03",
"actual": "784.03",
"correct": true,
"inputTokens": 9159,
"outputTokens": 4,
"latencyMs": 1779
},
{
"questionId": "q69",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "784.03",
"actual": "784.03",
"correct": true,
"inputTokens": 9289,
"outputTokens": 7,
"latencyMs": 3153
},
{
"questionId": "q69",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "784.03",
"actual": "784.03",
"correct": true,
"inputTokens": 7374,
"outputTokens": 4,
"latencyMs": 1813
},
{
"questionId": "q69",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "784.03",
"actual": "784.03",
"correct": true,
"inputTokens": 8385,
"outputTokens": 7,
"latencyMs": 1867
},
{
"questionId": "q70",
"format": "json",
"model": "gpt-4o-mini",
"expected": "shipped",
"actual": "shipped",
"correct": true,
"inputTokens": 9739,
"outputTokens": 3,
"latencyMs": 1611
},
{
"questionId": "q70",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "shipped",
"actual": "shipped",
"correct": true,
"inputTokens": 11906,
"outputTokens": 4,
"latencyMs": 1173
},
{
"questionId": "q70",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "shipped",
"actual": "shipped",
"correct": true,
"inputTokens": 6013,
"outputTokens": 3,
"latencyMs": 1977
},
{
"questionId": "q70",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "shipped",
"actual": "shipped",
"correct": true,
"inputTokens": 6992,
"outputTokens": 4,
"latencyMs": 1108
},
{
"questionId": "q70",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "shipped",
"actual": "shipped",
"correct": true,
"inputTokens": 6781,
"outputTokens": 3,
"latencyMs": 1324
},
{
"questionId": "q70",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "shipped",
"actual": "shipped",
"correct": true,
"inputTokens": 8413,
"outputTokens": 4,
"latencyMs": 1225
},
{
"questionId": "q70",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "shipped",
"actual": "shipped",
"correct": true,
"inputTokens": 9158,
"outputTokens": 3,
"latencyMs": 1416
},
{
"questionId": "q70",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "shipped",
"actual": "shipped",
"correct": true,
"inputTokens": 9288,
"outputTokens": 4,
"latencyMs": 1200
},
{
"questionId": "q70",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "shipped",
"actual": "shipped",
"correct": true,
"inputTokens": 7373,
"outputTokens": 3,
"latencyMs": 1259
},
{
"questionId": "q70",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "shipped",
"actual": "shipped",
"correct": true,
"inputTokens": 8384,
"outputTokens": 4,
"latencyMs": 1433
},
{
"questionId": "q71",
"format": "json",
"model": "gpt-4o-mini",
"expected": "645.88",
"actual": "645.88",
"correct": true,
"inputTokens": 9740,
"outputTokens": 4,
"latencyMs": 1729
},
{
"questionId": "q71",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "645.88",
"actual": "645.88",
"correct": true,
"inputTokens": 11907,
"outputTokens": 7,
"latencyMs": 1143
},
{
"questionId": "q71",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "645.88",
"actual": "645.88",
"correct": true,
"inputTokens": 6014,
"outputTokens": 4,
"latencyMs": 1837
},
{
"questionId": "q71",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "645.88",
"actual": "645.88",
"correct": true,
"inputTokens": 6993,
"outputTokens": 7,
"latencyMs": 1147
},
{
"questionId": "q71",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "645.88",
"actual": "645.88",
"correct": true,
"inputTokens": 6782,
"outputTokens": 4,
"latencyMs": 1777
},
{
"questionId": "q71",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "645.88",
"actual": "645.88",
"correct": true,
"inputTokens": 8414,
"outputTokens": 7,
"latencyMs": 1295
},
{
"questionId": "q71",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "645.88",
"actual": "645.88",
"correct": true,
"inputTokens": 9159,
"outputTokens": 4,
"latencyMs": 1081
},
{
"questionId": "q71",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "645.88",
"actual": "645.88",
"correct": true,
"inputTokens": 9289,
"outputTokens": 7,
"latencyMs": 1692
},
{
"questionId": "q71",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "645.88",
"actual": "645.88",
"correct": true,
"inputTokens": 7374,
"outputTokens": 4,
"latencyMs": 1661
},
{
"questionId": "q71",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "645.88",
"actual": "645.88",
"correct": true,
"inputTokens": 8385,
"outputTokens": 7,
"latencyMs": 1475
},
{
"questionId": "q72",
"format": "json",
"model": "gpt-4o-mini",
"expected": "processing",
"actual": "processing",
"correct": true,
"inputTokens": 9739,
"outputTokens": 2,
"latencyMs": 2979
},
{
"questionId": "q72",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "processing",
"actual": "processing",
"correct": true,
"inputTokens": 11906,
"outputTokens": 4,
"latencyMs": 1187
},
{
"questionId": "q72",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "processing",
"actual": "processing",
"correct": true,
"inputTokens": 6013,
"outputTokens": 2,
"latencyMs": 1620
},
{
"questionId": "q72",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "processing",
"actual": "processing",
"correct": true,
"inputTokens": 6992,
"outputTokens": 4,
"latencyMs": 1532
},
{
"questionId": "q72",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "processing",
"actual": "processing",
"correct": true,
"inputTokens": 6781,
"outputTokens": 2,
"latencyMs": 1616
},
{
"questionId": "q72",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "processing",
"actual": "processing",
"correct": true,
"inputTokens": 8413,
"outputTokens": 4,
"latencyMs": 1435
},
{
"questionId": "q72",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "processing",
"actual": "processing",
"correct": true,
"inputTokens": 9158,
"outputTokens": 2,
"latencyMs": 1190
},
{
"questionId": "q72",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "processing",
"actual": "processing",
"correct": true,
"inputTokens": 9288,
"outputTokens": 4,
"latencyMs": 1414
},
{
"questionId": "q72",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "processing",
"actual": "processing",
"correct": true,
"inputTokens": 7373,
"outputTokens": 2,
"latencyMs": 2335
},
{
"questionId": "q72",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "processing",
"actual": "processing",
"correct": true,
"inputTokens": 8384,
"outputTokens": 4,
"latencyMs": 1308
},
{
"questionId": "q73",
"format": "json",
"model": "gpt-4o-mini",
"expected": "371.91",
"actual": "371.91",
"correct": true,
"inputTokens": 9740,
"outputTokens": 4,
"latencyMs": 3359
},
{
"questionId": "q73",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "371.91",
"actual": "371.91",
"correct": true,
"inputTokens": 11907,
"outputTokens": 7,
"latencyMs": 1227
},
{
"questionId": "q73",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "371.91",
"actual": "371.91",
"correct": true,
"inputTokens": 6014,
"outputTokens": 4,
"latencyMs": 1439
},
{
"questionId": "q73",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "371.91",
"actual": "371.91",
"correct": true,
"inputTokens": 6993,
"outputTokens": 7,
"latencyMs": 1179
},
{
"questionId": "q73",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "371.91",
"actual": "371.91",
"correct": true,
"inputTokens": 6782,
"outputTokens": 4,
"latencyMs": 1064
},
{
"questionId": "q73",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "371.91",
"actual": "371.91",
"correct": true,
"inputTokens": 8414,
"outputTokens": 7,
"latencyMs": 1144
},
{
"questionId": "q73",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "371.91",
"actual": "371.91",
"correct": true,
"inputTokens": 9159,
"outputTokens": 4,
"latencyMs": 1873
},
{
"questionId": "q73",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "371.91",
"actual": "371.91",
"correct": true,
"inputTokens": 9289,
"outputTokens": 7,
"latencyMs": 1302
},
{
"questionId": "q73",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "371.91",
"actual": "371.91",
"correct": true,
"inputTokens": 7374,
"outputTokens": 4,
"latencyMs": 1956
},
{
"questionId": "q73",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "371.91",
"actual": "371.91",
"correct": true,
"inputTokens": 8385,
"outputTokens": 7,
"latencyMs": 1281
},
{
"questionId": "q74",
"format": "json",
"model": "gpt-4o-mini",
"expected": "pending",
"actual": "pending",
"correct": true,
"inputTokens": 9739,
"outputTokens": 2,
"latencyMs": 1591
},
{
"questionId": "q74",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "pending",
"actual": "pending",
"correct": true,
"inputTokens": 11906,
"outputTokens": 4,
"latencyMs": 1279
},
{
"questionId": "q74",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "pending",
"actual": "pending",
"correct": true,
"inputTokens": 6013,
"outputTokens": 2,
"latencyMs": 3152
},
{
"questionId": "q74",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "pending",
"actual": "pending",
"correct": true,
"inputTokens": 6992,
"outputTokens": 4,
"latencyMs": 1061
},
{
"questionId": "q74",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "pending",
"actual": "pending",
"correct": true,
"inputTokens": 6781,
"outputTokens": 2,
"latencyMs": 1557
},
{
"questionId": "q74",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "pending",
"actual": "pending",
"correct": true,
"inputTokens": 8413,
"outputTokens": 4,
"latencyMs": 1313
},
{
"questionId": "q74",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "pending",
"actual": "pending",
"correct": true,
"inputTokens": 9158,
"outputTokens": 2,
"latencyMs": 1433
},
{
"questionId": "q74",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "pending",
"actual": "pending",
"correct": true,
"inputTokens": 9288,
"outputTokens": 4,
"latencyMs": 1812
},
{
"questionId": "q74",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "pending",
"actual": "pending",
"correct": true,
"inputTokens": 7373,
"outputTokens": 2,
"latencyMs": 1024
},
{
"questionId": "q74",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "pending",
"actual": "pending",
"correct": true,
"inputTokens": 8384,
"outputTokens": 4,
"latencyMs": 1243
},
{
"questionId": "q75",
"format": "json",
"model": "gpt-4o-mini",
"expected": "1066",
"actual": "1066",
"correct": true,
"inputTokens": 9740,
"outputTokens": 3,
"latencyMs": 1500
},
{
"questionId": "q75",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "1066",
"actual": "1066",
"correct": true,
"inputTokens": 11907,
"outputTokens": 6,
"latencyMs": 1275
},
{
"questionId": "q75",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "1066",
"actual": "1066",
"correct": true,
"inputTokens": 6014,
"outputTokens": 3,
"latencyMs": 1841
},
{
"questionId": "q75",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "1066",
"actual": "1066",
"correct": true,
"inputTokens": 6993,
"outputTokens": 6,
"latencyMs": 1080
},
{
"questionId": "q75",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "1066",
"actual": "1066",
"correct": true,
"inputTokens": 6782,
"outputTokens": 3,
"latencyMs": 1209
},
{
"questionId": "q75",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "1066",
"actual": "1066",
"correct": true,
"inputTokens": 8414,
"outputTokens": 6,
"latencyMs": 1308
},
{
"questionId": "q75",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "1066",
"actual": "1066",
"correct": true,
"inputTokens": 9159,
"outputTokens": 3,
"latencyMs": 1556
},
{
"questionId": "q75",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "1066",
"actual": "1066",
"correct": true,
"inputTokens": 9289,
"outputTokens": 6,
"latencyMs": 1240
},
{
"questionId": "q75",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "1066",
"actual": "1066",
"correct": true,
"inputTokens": 7374,
"outputTokens": 3,
"latencyMs": 1254
},
{
"questionId": "q75",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "1066",
"actual": "1066",
"correct": true,
"inputTokens": 8385,
"outputTokens": 6,
"latencyMs": 1305
},
{
"questionId": "q76",
"format": "json",
"model": "gpt-4o-mini",
"expected": "cancelled",
"actual": "cancelled",
"correct": true,
"inputTokens": 9739,
"outputTokens": 3,
"latencyMs": 2606
},
{
"questionId": "q76",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "cancelled",
"actual": "cancelled",
"correct": true,
"inputTokens": 11906,
"outputTokens": 4,
"latencyMs": 1422
},
{
"questionId": "q76",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "cancelled",
"actual": "cancelled",
"correct": true,
"inputTokens": 6013,
"outputTokens": 3,
"latencyMs": 2688
},
{
"questionId": "q76",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "cancelled",
"actual": "cancelled",
"correct": true,
"inputTokens": 6992,
"outputTokens": 4,
"latencyMs": 1041
},
{
"questionId": "q76",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "cancelled",
"actual": "cancelled",
"correct": true,
"inputTokens": 6781,
"outputTokens": 3,
"latencyMs": 3070
},
{
"questionId": "q76",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "cancelled",
"actual": "cancelled",
"correct": true,
"inputTokens": 8413,
"outputTokens": 4,
"latencyMs": 1167
},
{
"questionId": "q76",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "cancelled",
"actual": "cancelled",
"correct": true,
"inputTokens": 9158,
"outputTokens": 3,
"latencyMs": 1702
},
{
"questionId": "q76",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "cancelled",
"actual": "cancelled",
"correct": true,
"inputTokens": 9288,
"outputTokens": 4,
"latencyMs": 1182
},
{
"questionId": "q76",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "cancelled",
"actual": "cancelled",
"correct": true,
"inputTokens": 7373,
"outputTokens": 3,
"latencyMs": 1740
},
{
"questionId": "q76",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "cancelled",
"actual": "cancelled",
"correct": true,
"inputTokens": 8384,
"outputTokens": 4,
"latencyMs": 1404
},
{
"questionId": "q77",
"format": "json",
"model": "gpt-4o-mini",
"expected": "1697.4",
"actual": "1697.4",
"correct": true,
"inputTokens": 9740,
"outputTokens": 5,
"latencyMs": 1596
},
{
"questionId": "q77",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "1697.4",
"actual": "1697.4",
"correct": true,
"inputTokens": 11907,
"outputTokens": 8,
"latencyMs": 2314
},
{
"questionId": "q77",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "1697.4",
"actual": "1697.4",
"correct": true,
"inputTokens": 6014,
"outputTokens": 5,
"latencyMs": 1114
},
{
"questionId": "q77",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "1697.4",
"actual": "1697.4",
"correct": true,
"inputTokens": 6993,
"outputTokens": 8,
"latencyMs": 1289
},
{
"questionId": "q77",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "1697.4",
"actual": "1697.4",
"correct": true,
"inputTokens": 6782,
"outputTokens": 5,
"latencyMs": 2428
},
{
"questionId": "q77",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "1697.4",
"actual": "1697.4",
"correct": true,
"inputTokens": 8414,
"outputTokens": 8,
"latencyMs": 1325
},
{
"questionId": "q77",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "1697.4",
"actual": "1697.4",
"correct": true,
"inputTokens": 9159,
"outputTokens": 5,
"latencyMs": 1343
},
{
"questionId": "q77",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "1697.4",
"actual": "1697.4",
"correct": true,
"inputTokens": 9289,
"outputTokens": 8,
"latencyMs": 1783
},
{
"questionId": "q77",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "1697.4",
"actual": "1697.4",
"correct": true,
"inputTokens": 7374,
"outputTokens": 5,
"latencyMs": 918
},
{
"questionId": "q77",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "1697.4",
"actual": "1697.4",
"correct": true,
"inputTokens": 8385,
"outputTokens": 8,
"latencyMs": 1308
},
{
"questionId": "q78",
"format": "json",
"model": "gpt-4o-mini",
"expected": "delivered",
"actual": "delivered",
"correct": true,
"inputTokens": 9739,
"outputTokens": 3,
"latencyMs": 1396
},
{
"questionId": "q78",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "delivered",
"actual": "delivered",
"correct": true,
"inputTokens": 11906,
"outputTokens": 4,
"latencyMs": 1225
},
{
"questionId": "q78",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "delivered",
"actual": "delivered",
"correct": true,
"inputTokens": 6013,
"outputTokens": 3,
"latencyMs": 2294
},
{
"questionId": "q78",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "delivered",
"actual": "delivered",
"correct": true,
"inputTokens": 6992,
"outputTokens": 4,
"latencyMs": 1418
},
{
"questionId": "q78",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "delivered",
"actual": "delivered",
"correct": true,
"inputTokens": 6781,
"outputTokens": 3,
"latencyMs": 1613
},
{
"questionId": "q78",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "delivered",
"actual": "delivered",
"correct": true,
"inputTokens": 8413,
"outputTokens": 4,
"latencyMs": 1374
},
{
"questionId": "q78",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "delivered",
"actual": "delivered",
"correct": true,
"inputTokens": 9158,
"outputTokens": 3,
"latencyMs": 1341
},
{
"questionId": "q78",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "delivered",
"actual": "delivered",
"correct": true,
"inputTokens": 9288,
"outputTokens": 4,
"latencyMs": 1223
},
{
"questionId": "q78",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "delivered",
"actual": "delivered",
"correct": true,
"inputTokens": 7373,
"outputTokens": 3,
"latencyMs": 2230
},
{
"questionId": "q78",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "delivered",
"actual": "delivered",
"correct": true,
"inputTokens": 8384,
"outputTokens": 4,
"latencyMs": 1425
},
{
"questionId": "q79",
"format": "json",
"model": "gpt-4o-mini",
"expected": "Valerie Braun",
"actual": "Valerie Braun",
"correct": true,
"inputTokens": 9740,
"outputTokens": 4,
"latencyMs": 1377
},
{
"questionId": "q79",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Valerie Braun",
"actual": "Valerie Braun",
"correct": true,
"inputTokens": 11907,
"outputTokens": 9,
"latencyMs": 1550
},
{
"questionId": "q79",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "Valerie Braun",
"actual": "Valerie Braun",
"correct": true,
"inputTokens": 6014,
"outputTokens": 4,
"latencyMs": 1394
},
{
"questionId": "q79",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Valerie Braun",
"actual": "Valerie Braun",
"correct": true,
"inputTokens": 6993,
"outputTokens": 9,
"latencyMs": 1202
},
{
"questionId": "q79",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "Valerie Braun",
"actual": "Valerie Braun",
"correct": true,
"inputTokens": 6782,
"outputTokens": 4,
"latencyMs": 1435
},
{
"questionId": "q79",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Valerie Braun",
"actual": "Valerie Braun",
"correct": true,
"inputTokens": 8414,
"outputTokens": 9,
"latencyMs": 1277
},
{
"questionId": "q79",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "Valerie Braun",
"actual": "Valerie Braun",
"correct": true,
"inputTokens": 9159,
"outputTokens": 4,
"latencyMs": 1564
},
{
"questionId": "q79",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "Valerie Braun",
"actual": "Valerie Braun",
"correct": true,
"inputTokens": 9289,
"outputTokens": 9,
"latencyMs": 1200
},
{
"questionId": "q79",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "Valerie Braun",
"actual": "Valerie Braun",
"correct": true,
"inputTokens": 7374,
"outputTokens": 4,
"latencyMs": 1596
},
{
"questionId": "q79",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Valerie Braun",
"actual": "Valerie Braun",
"correct": true,
"inputTokens": 8385,
"outputTokens": 9,
"latencyMs": 1151
},
{
"questionId": "q80",
"format": "json",
"model": "gpt-4o-mini",
"expected": "Anita Kozey",
"actual": "Anita Kozey",
"correct": true,
"inputTokens": 9740,
"outputTokens": 5,
"latencyMs": 1458
},
{
"questionId": "q80",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Anita Kozey",
"actual": "Anita Kozey",
"correct": true,
"inputTokens": 11907,
"outputTokens": 9,
"latencyMs": 1283
},
{
"questionId": "q80",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "Anita Kozey",
"actual": "Anita Kozey",
"correct": true,
"inputTokens": 6014,
"outputTokens": 5,
"latencyMs": 4702
},
{
"questionId": "q80",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Anita Kozey",
"actual": "Anita Kozey",
"correct": true,
"inputTokens": 6993,
"outputTokens": 9,
"latencyMs": 1360
},
{
"questionId": "q80",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "Anita Kozey",
"actual": "Anita Kozey",
"correct": true,
"inputTokens": 6782,
"outputTokens": 5,
"latencyMs": 6167
},
{
"questionId": "q80",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Anita Kozey",
"actual": "Anita Kozey",
"correct": true,
"inputTokens": 8414,
"outputTokens": 9,
"latencyMs": 1449
},
{
"questionId": "q80",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "Anita Kozey",
"actual": "Anita Kozey",
"correct": true,
"inputTokens": 9159,
"outputTokens": 5,
"latencyMs": 6096
},
{
"questionId": "q80",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "Anita Kozey",
"actual": "Anita Kozey",
"correct": true,
"inputTokens": 9289,
"outputTokens": 9,
"latencyMs": 1194
},
{
"questionId": "q80",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "Anita Kozey",
"actual": "Anita Kozey",
"correct": true,
"inputTokens": 7374,
"outputTokens": 5,
"latencyMs": 7357
},
{
"questionId": "q80",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Anita Kozey",
"actual": "Anita Kozey",
"correct": true,
"inputTokens": 8385,
"outputTokens": 9,
"latencyMs": 1213
},
{
"questionId": "q81",
"format": "json",
"model": "gpt-4o-mini",
"expected": "Elmer Kub PhD",
"actual": "Elmer Kub PhD",
"correct": true,
"inputTokens": 9740,
"outputTokens": 6,
"latencyMs": 2539
},
{
"questionId": "q81",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Elmer Kub PhD",
"actual": "Elmer Kub PhD",
"correct": true,
"inputTokens": 11907,
"outputTokens": 10,
"latencyMs": 1532
},
{
"questionId": "q81",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "Elmer Kub PhD",
"actual": "Elmer Kub PhD",
"correct": true,
"inputTokens": 6014,
"outputTokens": 6,
"latencyMs": 2960
},
{
"questionId": "q81",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Elmer Kub PhD",
"actual": "Elmer Kub PhD",
"correct": true,
"inputTokens": 6993,
"outputTokens": 10,
"latencyMs": 1547
},
{
"questionId": "q81",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "Elmer Kub PhD",
"actual": "Elmer Kub PhD",
"correct": true,
"inputTokens": 6782,
"outputTokens": 6,
"latencyMs": 1358
},
{
"questionId": "q81",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Elmer Kub PhD",
"actual": "Elmer Kub PhD",
"correct": true,
"inputTokens": 8414,
"outputTokens": 10,
"latencyMs": 1424
},
{
"questionId": "q81",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "Elmer Kub PhD",
"actual": "Elmer Kub PhD",
"correct": true,
"inputTokens": 9159,
"outputTokens": 6,
"latencyMs": 958
},
{
"questionId": "q81",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "Elmer Kub PhD",
"actual": "Elmer Kub PhD",
"correct": true,
"inputTokens": 9289,
"outputTokens": 10,
"latencyMs": 1381
},
{
"questionId": "q81",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "Elmer Kub PhD",
"actual": "Elmer Kub PhD",
"correct": true,
"inputTokens": 7374,
"outputTokens": 6,
"latencyMs": 1372
},
{
"questionId": "q81",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Elmer Kub PhD",
"actual": "Elmer Kub PhD",
"correct": true,
"inputTokens": 8385,
"outputTokens": 10,
"latencyMs": 1715
},
{
"questionId": "q82",
"format": "json",
"model": "gpt-4o-mini",
"expected": "Maxine Zemlak",
"actual": "Maxine Zemlak",
"correct": true,
"inputTokens": 9740,
"outputTokens": 5,
"latencyMs": 1972
},
{
"questionId": "q82",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Maxine Zemlak",
"actual": "Maxine Zemlak",
"correct": true,
"inputTokens": 11907,
"outputTokens": 10,
"latencyMs": 1315
},
{
"questionId": "q82",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "Maxine Zemlak",
"actual": "Maxine Zemlak",
"correct": true,
"inputTokens": 6014,
"outputTokens": 5,
"latencyMs": 1634
},
{
"questionId": "q82",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Maxine Zemlak",
"actual": "Maxine Zemlak",
"correct": true,
"inputTokens": 6993,
"outputTokens": 10,
"latencyMs": 1264
},
{
"questionId": "q82",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "Maxine Zemlak",
"actual": "Maxine Zemlak",
"correct": true,
"inputTokens": 6782,
"outputTokens": 5,
"latencyMs": 1153
},
{
"questionId": "q82",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Maxine Zemlak",
"actual": "Maxine Zemlak",
"correct": true,
"inputTokens": 8414,
"outputTokens": 10,
"latencyMs": 1252
},
{
"questionId": "q82",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "Maxine Zemlak",
"actual": "Maxine Zemlak",
"correct": true,
"inputTokens": 9159,
"outputTokens": 5,
"latencyMs": 1697
},
{
"questionId": "q82",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "Maxine Zemlak",
"actual": "Maxine Zemlak",
"correct": true,
"inputTokens": 9289,
"outputTokens": 10,
"latencyMs": 1198
},
{
"questionId": "q82",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "Maxine Zemlak",
"actual": "Maxine Zemlak",
"correct": true,
"inputTokens": 7374,
"outputTokens": 5,
"latencyMs": 1854
},
{
"questionId": "q82",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Maxine Zemlak",
"actual": "Maxine Zemlak",
"correct": true,
"inputTokens": 8385,
"outputTokens": 10,
"latencyMs": 1752
},
{
"questionId": "q83",
"format": "json",
"model": "gpt-4o-mini",
"expected": "Emanuel Littel",
"actual": "Emanuel Littel",
"correct": true,
"inputTokens": 9740,
"outputTokens": 5,
"latencyMs": 2076
},
{
"questionId": "q83",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Emanuel Littel",
"actual": "Emanuel Littel",
"correct": true,
"inputTokens": 11907,
"outputTokens": 7,
"latencyMs": 1398
},
{
"questionId": "q83",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "Emanuel Littel",
"actual": "Emanuel Littel",
"correct": true,
"inputTokens": 6014,
"outputTokens": 5,
"latencyMs": 2263
},
{
"questionId": "q83",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Emanuel Littel",
"actual": "Emanuel Littel",
"correct": true,
"inputTokens": 6993,
"outputTokens": 7,
"latencyMs": 3101
},
{
"questionId": "q83",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "Emanuel Littel",
"actual": "Emanuel Littel",
"correct": true,
"inputTokens": 6782,
"outputTokens": 5,
"latencyMs": 1453
},
{
"questionId": "q83",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Emanuel Littel",
"actual": "Emanuel Littel",
"correct": true,
"inputTokens": 8414,
"outputTokens": 7,
"latencyMs": 1265
},
{
"questionId": "q83",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "Emanuel Littel",
"actual": "Emanuel Littel",
"correct": true,
"inputTokens": 9159,
"outputTokens": 5,
"latencyMs": 8807
},
{
"questionId": "q83",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "Emanuel Littel",
"actual": "Emanuel Littel",
"correct": true,
"inputTokens": 9289,
"outputTokens": 7,
"latencyMs": 1097
},
{
"questionId": "q83",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "Emanuel Littel",
"actual": "Emanuel Littel",
"correct": true,
"inputTokens": 7374,
"outputTokens": 5,
"latencyMs": 1667
},
{
"questionId": "q83",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Emanuel Littel",
"actual": "Emanuel Littel",
"correct": true,
"inputTokens": 8385,
"outputTokens": 7,
"latencyMs": 1198
},
{
"questionId": "q84",
"format": "json",
"model": "gpt-4o-mini",
"expected": "Andrew Kling",
"actual": "Andrew Kling",
"correct": true,
"inputTokens": 9740,
"outputTokens": 3,
"latencyMs": 2292
},
{
"questionId": "q84",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Andrew Kling",
"actual": "Andrew Kling",
"correct": true,
"inputTokens": 11907,
"outputTokens": 7,
"latencyMs": 1202
},
{
"questionId": "q84",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "Andrew Kling",
"actual": "Andrew Kling",
"correct": true,
"inputTokens": 6014,
"outputTokens": 3,
"latencyMs": 1801
},
{
"questionId": "q84",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Andrew Kling",
"actual": "Andrew Kling",
"correct": true,
"inputTokens": 6993,
"outputTokens": 7,
"latencyMs": 1287
},
{
"questionId": "q84",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "Andrew Kling",
"actual": "Andrew Kling",
"correct": true,
"inputTokens": 6782,
"outputTokens": 3,
"latencyMs": 1340
},
{
"questionId": "q84",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Andrew Kling",
"actual": "Andrew Kling",
"correct": true,
"inputTokens": 8414,
"outputTokens": 7,
"latencyMs": 1163
},
{
"questionId": "q84",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "Andrew Kling",
"actual": "Andrew Kling",
"correct": true,
"inputTokens": 9159,
"outputTokens": 3,
"latencyMs": 2685
},
{
"questionId": "q84",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "Andrew Kling",
"actual": "Andrew Kling",
"correct": true,
"inputTokens": 9289,
"outputTokens": 7,
"latencyMs": 1397
},
{
"questionId": "q84",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "Andrew Kling",
"actual": "Andrew Kling",
"correct": true,
"inputTokens": 7374,
"outputTokens": 3,
"latencyMs": 1289
},
{
"questionId": "q84",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Andrew Kling",
"actual": "Andrew Kling",
"correct": true,
"inputTokens": 8385,
"outputTokens": 7,
"latencyMs": 1155
},
{
"questionId": "q85",
"format": "json",
"model": "gpt-4o-mini",
"expected": "Morris O'Hara",
"actual": "Morris O'Hara",
"correct": true,
"inputTokens": 9740,
"outputTokens": 6,
"latencyMs": 1601
},
{
"questionId": "q85",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Morris O'Hara",
"actual": "Morris O'Hara",
"correct": true,
"inputTokens": 11907,
"outputTokens": 9,
"latencyMs": 1340
},
{
"questionId": "q85",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "Morris O'Hara",
"actual": "Morris O'Hara",
"correct": true,
"inputTokens": 6014,
"outputTokens": 6,
"latencyMs": 3525
},
{
"questionId": "q85",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Morris O'Hara",
"actual": "Morris O'Hara",
"correct": true,
"inputTokens": 6993,
"outputTokens": 9,
"latencyMs": 1710
},
{
"questionId": "q85",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "Morris O'Hara",
"actual": "Morris O'Hara",
"correct": true,
"inputTokens": 6782,
"outputTokens": 6,
"latencyMs": 2333
},
{
"questionId": "q85",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Morris O'Hara",
"actual": "Morris O'Hara",
"correct": true,
"inputTokens": 8414,
"outputTokens": 9,
"latencyMs": 1168
},
{
"questionId": "q85",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "Morris O'Hara",
"actual": "Morris O'Hara",
"correct": true,
"inputTokens": 9159,
"outputTokens": 6,
"latencyMs": 1781
},
{
"questionId": "q85",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "Morris O'Hara",
"actual": "Morris O'Hara",
"correct": true,
"inputTokens": 9289,
"outputTokens": 9,
"latencyMs": 1552
},
{
"questionId": "q85",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "Morris O'Hara",
"actual": "Morris O'Hara",
"correct": true,
"inputTokens": 7374,
"outputTokens": 6,
"latencyMs": 1584
},
{
"questionId": "q85",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Morris O'Hara",
"actual": "Morris O'Hara",
"correct": true,
"inputTokens": 8385,
"outputTokens": 9,
"latencyMs": 1548
},
{
"questionId": "q86",
"format": "json",
"model": "gpt-4o-mini",
"expected": "Elijah Franecki",
"actual": "Elijah Franecki",
"correct": true,
"inputTokens": 9740,
"outputTokens": 6,
"latencyMs": 7230
},
{
"questionId": "q86",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Elijah Franecki",
"actual": "Elijah Franecki",
"correct": true,
"inputTokens": 11907,
"outputTokens": 9,
"latencyMs": 1933
},
{
"questionId": "q86",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "Elijah Franecki",
"actual": "Elijah Franecki",
"correct": true,
"inputTokens": 6014,
"outputTokens": 6,
"latencyMs": 1067
},
{
"questionId": "q86",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Elijah Franecki",
"actual": "Elijah Franecki",
"correct": true,
"inputTokens": 6993,
"outputTokens": 9,
"latencyMs": 1288
},
{
"questionId": "q86",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "Elijah Franecki",
"actual": "Elijah Franecki",
"correct": true,
"inputTokens": 6782,
"outputTokens": 6,
"latencyMs": 3954
},
{
"questionId": "q86",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Elijah Franecki",
"actual": "Elijah Franecki",
"correct": true,
"inputTokens": 8414,
"outputTokens": 9,
"latencyMs": 1314
},
{
"questionId": "q86",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "Elijah Franecki",
"actual": "Elijah Franecki",
"correct": true,
"inputTokens": 9159,
"outputTokens": 6,
"latencyMs": 1334
},
{
"questionId": "q86",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "Elijah Franecki",
"actual": "Elijah Franecki",
"correct": true,
"inputTokens": 9289,
"outputTokens": 9,
"latencyMs": 2441
},
{
"questionId": "q86",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "Elijah Franecki",
"actual": "Elijah Franecki",
"correct": true,
"inputTokens": 7374,
"outputTokens": 6,
"latencyMs": 1650
},
{
"questionId": "q86",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Elijah Franecki",
"actual": "Elijah Franecki",
"correct": true,
"inputTokens": 8385,
"outputTokens": 9,
"latencyMs": 1495
},
{
"questionId": "q87",
"format": "json",
"model": "gpt-4o-mini",
"expected": "Malcolm Erdman",
"actual": "Malcolm Erdman",
"correct": true,
"inputTokens": 9740,
"outputTokens": 5,
"latencyMs": 1262
},
{
"questionId": "q87",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Malcolm Erdman",
"actual": "Malcolm Erdman",
"correct": true,
"inputTokens": 11907,
"outputTokens": 7,
"latencyMs": 1367
},
{
"questionId": "q87",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "Malcolm Erdman",
"actual": "Malcolm Erdman",
"correct": true,
"inputTokens": 6014,
"outputTokens": 5,
"latencyMs": 1385
},
{
"questionId": "q87",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Malcolm Erdman",
"actual": "Malcolm Erdman",
"correct": true,
"inputTokens": 6993,
"outputTokens": 7,
"latencyMs": 1313
},
{
"questionId": "q87",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "Malcolm Erdman",
"actual": "Malcolm Erdman",
"correct": true,
"inputTokens": 6782,
"outputTokens": 5,
"latencyMs": 1141
},
{
"questionId": "q87",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Malcolm Erdman",
"actual": "Malcolm Erdman",
"correct": true,
"inputTokens": 8414,
"outputTokens": 7,
"latencyMs": 1300
},
{
"questionId": "q87",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "Malcolm Erdman",
"actual": "Malcolm Erdman",
"correct": true,
"inputTokens": 9159,
"outputTokens": 5,
"latencyMs": 3347
},
{
"questionId": "q87",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "Malcolm Erdman",
"actual": "Malcolm Erdman",
"correct": true,
"inputTokens": 9289,
"outputTokens": 7,
"latencyMs": 1457
},
{
"questionId": "q87",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "Malcolm Erdman",
"actual": "Malcolm Erdman",
"correct": true,
"inputTokens": 7374,
"outputTokens": 5,
"latencyMs": 1276
},
{
"questionId": "q87",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Malcolm Erdman",
"actual": "Malcolm Erdman",
"correct": true,
"inputTokens": 8385,
"outputTokens": 7,
"latencyMs": 1211
},
{
"questionId": "q88",
"format": "json",
"model": "gpt-4o-mini",
"expected": "Fannie Skiles",
"actual": "Fannie Skiles",
"correct": true,
"inputTokens": 9740,
"outputTokens": 5,
"latencyMs": 1635
},
{
"questionId": "q88",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Fannie Skiles",
"actual": "Fannie Skiles",
"correct": true,
"inputTokens": 11907,
"outputTokens": 9,
"latencyMs": 1582
},
{
"questionId": "q88",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "Fannie Skiles",
"actual": "Fannie Skiles",
"correct": true,
"inputTokens": 6014,
"outputTokens": 5,
"latencyMs": 1695
},
{
"questionId": "q88",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Fannie Skiles",
"actual": "Fannie Skiles",
"correct": true,
"inputTokens": 6993,
"outputTokens": 9,
"latencyMs": 1318
},
{
"questionId": "q88",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "Fannie Skiles",
"actual": "Fannie Skiles",
"correct": true,
"inputTokens": 6782,
"outputTokens": 5,
"latencyMs": 936
},
{
"questionId": "q88",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Fannie Skiles",
"actual": "Fannie Skiles",
"correct": true,
"inputTokens": 8414,
"outputTokens": 9,
"latencyMs": 1204
},
{
"questionId": "q88",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "Fannie Skiles",
"actual": "Fannie Skiles",
"correct": true,
"inputTokens": 9159,
"outputTokens": 5,
"latencyMs": 996
},
{
"questionId": "q88",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "Fannie Skiles",
"actual": "Fannie Skiles",
"correct": true,
"inputTokens": 9289,
"outputTokens": 9,
"latencyMs": 1261
},
{
"questionId": "q88",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "Fannie Skiles",
"actual": "Fannie Skiles",
"correct": true,
"inputTokens": 7374,
"outputTokens": 5,
"latencyMs": 2276
},
{
"questionId": "q88",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Fannie Skiles",
"actual": "Fannie Skiles",
"correct": true,
"inputTokens": 8385,
"outputTokens": 9,
"latencyMs": 1380
},
{
"questionId": "q89",
"format": "json",
"model": "gpt-4o-mini",
"expected": "Sonja Emmerich",
"actual": "Sonja Emmerich",
"correct": true,
"inputTokens": 9740,
"outputTokens": 6,
"latencyMs": 1451
},
{
"questionId": "q89",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Sonja Emmerich",
"actual": "Sonja Emmerich",
"correct": true,
"inputTokens": 11907,
"outputTokens": 10,
"latencyMs": 1977
},
{
"questionId": "q89",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "Sonja Emmerich",
"actual": "Sonja Emmerich",
"correct": true,
"inputTokens": 6014,
"outputTokens": 6,
"latencyMs": 1376
},
{
"questionId": "q89",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Sonja Emmerich",
"actual": "Sonja Emmerich",
"correct": true,
"inputTokens": 6993,
"outputTokens": 10,
"latencyMs": 1250
},
{
"questionId": "q89",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "Sonja Emmerich",
"actual": "Sonja Emmerich",
"correct": true,
"inputTokens": 6782,
"outputTokens": 6,
"latencyMs": 1273
},
{
"questionId": "q89",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Sonja Emmerich",
"actual": "Sonja Emmerich",
"correct": true,
"inputTokens": 8414,
"outputTokens": 10,
"latencyMs": 1359
},
{
"questionId": "q89",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "Sonja Emmerich",
"actual": "Sonja Emmerich",
"correct": true,
"inputTokens": 9159,
"outputTokens": 6,
"latencyMs": 1791
},
{
"questionId": "q89",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "Sonja Emmerich",
"actual": "Sonja Emmerich",
"correct": true,
"inputTokens": 9289,
"outputTokens": 10,
"latencyMs": 1273
},
{
"questionId": "q89",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "Sonja Emmerich",
"actual": "Sonja Emmerich",
"correct": true,
"inputTokens": 7374,
"outputTokens": 6,
"latencyMs": 2832
},
{
"questionId": "q89",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Sonja Emmerich",
"actual": "Sonja Emmerich",
"correct": true,
"inputTokens": 8385,
"outputTokens": 10,
"latencyMs": 1172
},
{
"questionId": "q90",
"format": "json",
"model": "gpt-4o-mini",
"expected": "Frank Emmerich DVM",
"actual": "Frank Emmerich DVM",
"correct": true,
"inputTokens": 9740,
"outputTokens": 7,
"latencyMs": 1491
},
{
"questionId": "q90",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Frank Emmerich DVM",
"actual": "Frank Emmerich DVM",
"correct": true,
"inputTokens": 11907,
"outputTokens": 10,
"latencyMs": 1414
},
{
"questionId": "q90",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "Frank Emmerich DVM",
"actual": "Frank Emmerich DVM",
"correct": true,
"inputTokens": 6014,
"outputTokens": 7,
"latencyMs": 1396
},
{
"questionId": "q90",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Frank Emmerich DVM",
"actual": "Frank Emmerich DVM",
"correct": true,
"inputTokens": 6993,
"outputTokens": 10,
"latencyMs": 1514
},
{
"questionId": "q90",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "Frank Emmerich DVM",
"actual": "Frank Emmerich DVM",
"correct": true,
"inputTokens": 6782,
"outputTokens": 7,
"latencyMs": 1573
},
{
"questionId": "q90",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Frank Emmerich DVM",
"actual": "Frank Emmerich DVM",
"correct": true,
"inputTokens": 8414,
"outputTokens": 10,
"latencyMs": 1284
},
{
"questionId": "q90",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "Frank Emmerich DVM",
"actual": "Frank Emmerich DVM",
"correct": true,
"inputTokens": 9159,
"outputTokens": 7,
"latencyMs": 5400
},
{
"questionId": "q90",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "Frank Emmerich DVM",
"actual": "Frank Emmerich DVM",
"correct": true,
"inputTokens": 9289,
"outputTokens": 10,
"latencyMs": 1486
},
{
"questionId": "q90",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "Frank Emmerich DVM",
"actual": "Frank Emmerich DVM",
"correct": true,
"inputTokens": 7374,
"outputTokens": 7,
"latencyMs": 1420
},
{
"questionId": "q90",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Frank Emmerich DVM",
"actual": "Frank Emmerich DVM",
"correct": true,
"inputTokens": 8385,
"outputTokens": 10,
"latencyMs": 1410
},
{
"questionId": "q91",
"format": "json",
"model": "gpt-4o-mini",
"expected": "Ronald Collins",
"actual": "Ronald Collins",
"correct": true,
"inputTokens": 9740,
"outputTokens": 4,
"latencyMs": 1248
},
{
"questionId": "q91",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Ronald Collins",
"actual": "Ronald Collins",
"correct": true,
"inputTokens": 11907,
"outputTokens": 5,
"latencyMs": 1177
},
{
"questionId": "q91",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "Ronald Collins",
"actual": "Ronald Collins",
"correct": true,
"inputTokens": 6014,
"outputTokens": 4,
"latencyMs": 1601
},
{
"questionId": "q91",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Ronald Collins",
"actual": "Ronald Collins",
"correct": true,
"inputTokens": 6993,
"outputTokens": 5,
"latencyMs": 1822
},
{
"questionId": "q91",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "Ronald Collins",
"actual": "Ronald Collins",
"correct": true,
"inputTokens": 6782,
"outputTokens": 4,
"latencyMs": 1103
},
{
"questionId": "q91",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Ronald Collins",
"actual": "Ronald Collins",
"correct": true,
"inputTokens": 8414,
"outputTokens": 5,
"latencyMs": 1247
},
{
"questionId": "q91",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "Ronald Collins",
"actual": "Ronald Collins",
"correct": true,
"inputTokens": 9159,
"outputTokens": 4,
"latencyMs": 1184
},
{
"questionId": "q91",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "Ronald Collins",
"actual": "Ronald Collins",
"correct": true,
"inputTokens": 9289,
"outputTokens": 5,
"latencyMs": 1137
},
{
"questionId": "q91",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "Ronald Collins",
"actual": "Ronald Collins",
"correct": true,
"inputTokens": 7374,
"outputTokens": 4,
"latencyMs": 949
},
{
"questionId": "q91",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Ronald Collins",
"actual": "Ronald Collins",
"correct": true,
"inputTokens": 8385,
"outputTokens": 5,
"latencyMs": 1143
},
{
"questionId": "q92",
"format": "json",
"model": "gpt-4o-mini",
"expected": "Jeannie Klein",
"actual": "Jeannie Klein",
"correct": true,
"inputTokens": 9740,
"outputTokens": 4,
"latencyMs": 1021
},
{
"questionId": "q92",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Jeannie Klein",
"actual": "Jeannie Klein",
"correct": true,
"inputTokens": 11907,
"outputTokens": 8,
"latencyMs": 1301
},
{
"questionId": "q92",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "Jeannie Klein",
"actual": "Jeannie Klein",
"correct": true,
"inputTokens": 6014,
"outputTokens": 4,
"latencyMs": 1254
},
{
"questionId": "q92",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Jeannie Klein",
"actual": "Jeannie Klein",
"correct": true,
"inputTokens": 6993,
"outputTokens": 8,
"latencyMs": 1375
},
{
"questionId": "q92",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "Jeannie Klein",
"actual": "Jeannie Klein",
"correct": true,
"inputTokens": 6782,
"outputTokens": 4,
"latencyMs": 1316
},
{
"questionId": "q92",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Jeannie Klein",
"actual": "Jeannie Klein",
"correct": true,
"inputTokens": 8414,
"outputTokens": 8,
"latencyMs": 2681
},
{
"questionId": "q92",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "Jeannie Klein",
"actual": "Jeannie Klein",
"correct": true,
"inputTokens": 9159,
"outputTokens": 4,
"latencyMs": 2427
},
{
"questionId": "q92",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "Jeannie Klein",
"actual": "Jeannie Klein",
"correct": true,
"inputTokens": 9289,
"outputTokens": 8,
"latencyMs": 1526
},
{
"questionId": "q92",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "Jeannie Klein",
"actual": "Jeannie Klein",
"correct": true,
"inputTokens": 7374,
"outputTokens": 4,
"latencyMs": 1252
},
{
"questionId": "q92",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Jeannie Klein",
"actual": "Jeannie Klein",
"correct": true,
"inputTokens": 8385,
"outputTokens": 8,
"latencyMs": 1324
},
{
"questionId": "q93",
"format": "json",
"model": "gpt-4o-mini",
"expected": "Joshua Watsica",
"actual": "Joshua Watsica",
"correct": true,
"inputTokens": 9740,
"outputTokens": 5,
"latencyMs": 1606
},
{
"questionId": "q93",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "Joshua Watsica",
"actual": "Joshua Watsica",
"correct": true,
"inputTokens": 11907,
"outputTokens": 8,
"latencyMs": 1223
},
{
"questionId": "q93",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "Joshua Watsica",
"actual": "Joshua Watsica",
"correct": true,
"inputTokens": 6014,
"outputTokens": 5,
"latencyMs": 1965
},
{
"questionId": "q93",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "Joshua Watsica",
"actual": "Joshua Watsica",
"correct": true,
"inputTokens": 6993,
"outputTokens": 8,
"latencyMs": 1300
},
{
"questionId": "q93",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "Joshua Watsica",
"actual": "Joshua Watsica",
"correct": true,
"inputTokens": 6782,
"outputTokens": 5,
"latencyMs": 1110
},
{
"questionId": "q93",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "Joshua Watsica",
"actual": "Joshua Watsica",
"correct": true,
"inputTokens": 8414,
"outputTokens": 8,
"latencyMs": 1819
},
{
"questionId": "q93",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "Joshua Watsica",
"actual": "Joshua Watsica",
"correct": true,
"inputTokens": 9159,
"outputTokens": 5,
"latencyMs": 1010
},
{
"questionId": "q93",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "Joshua Watsica",
"actual": "Joshua Watsica",
"correct": true,
"inputTokens": 9289,
"outputTokens": 8,
"latencyMs": 1224
},
{
"questionId": "q93",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "Joshua Watsica",
"actual": "Joshua Watsica",
"correct": true,
"inputTokens": 7374,
"outputTokens": 5,
"latencyMs": 1430
},
{
"questionId": "q93",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "Joshua Watsica",
"actual": "Joshua Watsica",
"correct": true,
"inputTokens": 8385,
"outputTokens": 8,
"latencyMs": 1158
},
{
"questionId": "q94",
"format": "json",
"model": "gpt-4o-mini",
"expected": "10",
"actual": "10",
"correct": true,
"inputTokens": 9736,
"outputTokens": 2,
"latencyMs": 1352
},
{
"questionId": "q94",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "8",
"correct": false,
"inputTokens": 11902,
"outputTokens": 5,
"latencyMs": 1498
},
{
"questionId": "q94",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "10",
"actual": "12",
"correct": false,
"inputTokens": 6010,
"outputTokens": 2,
"latencyMs": 1249
},
{
"questionId": "q94",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "8",
"correct": false,
"inputTokens": 6988,
"outputTokens": 5,
"latencyMs": 1080
},
{
"questionId": "q94",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "10",
"actual": "12",
"correct": false,
"inputTokens": 6778,
"outputTokens": 2,
"latencyMs": 1760
},
{
"questionId": "q94",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "8",
"correct": false,
"inputTokens": 8409,
"outputTokens": 5,
"latencyMs": 1156
},
{
"questionId": "q94",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "10",
"actual": "10",
"correct": true,
"inputTokens": 9155,
"outputTokens": 2,
"latencyMs": 9923
},
{
"questionId": "q94",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "8",
"correct": false,
"inputTokens": 9284,
"outputTokens": 5,
"latencyMs": 1138
},
{
"questionId": "q94",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "10",
"actual": "12",
"correct": false,
"inputTokens": 7370,
"outputTokens": 2,
"latencyMs": 1070
},
{
"questionId": "q94",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "8",
"correct": false,
"inputTokens": 8380,
"outputTokens": 5,
"latencyMs": 1114
},
{
"questionId": "q95",
"format": "json",
"model": "gpt-4o-mini",
"expected": "10",
"actual": "10",
"correct": true,
"inputTokens": 9736,
"outputTokens": 2,
"latencyMs": 830
},
{
"questionId": "q95",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "8",
"correct": false,
"inputTokens": 11902,
"outputTokens": 5,
"latencyMs": 1085
},
{
"questionId": "q95",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "10",
"actual": "10",
"correct": true,
"inputTokens": 6010,
"outputTokens": 2,
"latencyMs": 2362
},
{
"questionId": "q95",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "7",
"correct": false,
"inputTokens": 6988,
"outputTokens": 5,
"latencyMs": 1198
},
{
"questionId": "q95",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "10",
"actual": "10",
"correct": true,
"inputTokens": 6778,
"outputTokens": 2,
"latencyMs": 1630
},
{
"questionId": "q95",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "8",
"correct": false,
"inputTokens": 8409,
"outputTokens": 5,
"latencyMs": 1219
},
{
"questionId": "q95",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "10",
"actual": "10",
"correct": true,
"inputTokens": 9155,
"outputTokens": 2,
"latencyMs": 2666
},
{
"questionId": "q95",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "8",
"correct": false,
"inputTokens": 9284,
"outputTokens": 5,
"latencyMs": 1044
},
{
"questionId": "q95",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "10",
"actual": "12",
"correct": false,
"inputTokens": 7370,
"outputTokens": 2,
"latencyMs": 2187
},
{
"questionId": "q95",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "8",
"correct": false,
"inputTokens": 8380,
"outputTokens": 5,
"latencyMs": 1313
},
{
"questionId": "q96",
"format": "json",
"model": "gpt-4o-mini",
"expected": "10",
"actual": "20",
"correct": false,
"inputTokens": 9737,
"outputTokens": 2,
"latencyMs": 1087
},
{
"questionId": "q96",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "8",
"correct": false,
"inputTokens": 11902,
"outputTokens": 5,
"latencyMs": 1292
},
{
"questionId": "q96",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "10",
"actual": "15",
"correct": false,
"inputTokens": 6011,
"outputTokens": 2,
"latencyMs": 1979
},
{
"questionId": "q96",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "7",
"correct": false,
"inputTokens": 6988,
"outputTokens": 5,
"latencyMs": 1095
},
{
"questionId": "q96",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "10",
"actual": "15",
"correct": false,
"inputTokens": 6779,
"outputTokens": 2,
"latencyMs": 1385
},
{
"questionId": "q96",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "8",
"correct": false,
"inputTokens": 8409,
"outputTokens": 5,
"latencyMs": 1507
},
{
"questionId": "q96",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "10",
"actual": "10",
"correct": true,
"inputTokens": 9156,
"outputTokens": 2,
"latencyMs": 1579
},
{
"questionId": "q96",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "8",
"correct": false,
"inputTokens": 9284,
"outputTokens": 5,
"latencyMs": 1365
},
{
"questionId": "q96",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "10",
"actual": "20",
"correct": false,
"inputTokens": 7371,
"outputTokens": 2,
"latencyMs": 1661
},
{
"questionId": "q96",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "7",
"correct": false,
"inputTokens": 8380,
"outputTokens": 5,
"latencyMs": 1423
},
{
"questionId": "q97",
"format": "json",
"model": "gpt-4o-mini",
"expected": "10",
"actual": "15",
"correct": false,
"inputTokens": 9737,
"outputTokens": 2,
"latencyMs": 1815
},
{
"questionId": "q97",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "10",
"correct": true,
"inputTokens": 11902,
"outputTokens": 5,
"latencyMs": 1345
},
{
"questionId": "q97",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "10",
"actual": "10",
"correct": true,
"inputTokens": 6011,
"outputTokens": 2,
"latencyMs": 2193
},
{
"questionId": "q97",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "10",
"correct": true,
"inputTokens": 6988,
"outputTokens": 5,
"latencyMs": 1417
},
{
"questionId": "q97",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "10",
"actual": "15",
"correct": false,
"inputTokens": 6779,
"outputTokens": 2,
"latencyMs": 1721
},
{
"questionId": "q97",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "10",
"correct": true,
"inputTokens": 8409,
"outputTokens": 5,
"latencyMs": 1114
},
{
"questionId": "q97",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "10",
"actual": "15",
"correct": false,
"inputTokens": 9156,
"outputTokens": 2,
"latencyMs": 2208
},
{
"questionId": "q97",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "10",
"correct": true,
"inputTokens": 9284,
"outputTokens": 5,
"latencyMs": 1895
},
{
"questionId": "q97",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "10",
"actual": "15",
"correct": false,
"inputTokens": 7371,
"outputTokens": 2,
"latencyMs": 1287
},
{
"questionId": "q97",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "10",
"correct": true,
"inputTokens": 8380,
"outputTokens": 5,
"latencyMs": 1281
},
{
"questionId": "q98",
"format": "json",
"model": "gpt-4o-mini",
"expected": "10",
"actual": "10",
"correct": true,
"inputTokens": 9737,
"outputTokens": 2,
"latencyMs": 1387
},
{
"questionId": "q98",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "8",
"correct": false,
"inputTokens": 11902,
"outputTokens": 5,
"latencyMs": 1243
},
{
"questionId": "q98",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "10",
"actual": "10",
"correct": true,
"inputTokens": 6011,
"outputTokens": 2,
"latencyMs": 1284
},
{
"questionId": "q98",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "8",
"correct": false,
"inputTokens": 6988,
"outputTokens": 5,
"latencyMs": 1161
},
{
"questionId": "q98",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "10",
"actual": "15",
"correct": false,
"inputTokens": 6779,
"outputTokens": 2,
"latencyMs": 10406
},
{
"questionId": "q98",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "10",
"correct": true,
"inputTokens": 8409,
"outputTokens": 5,
"latencyMs": 1335
},
{
"questionId": "q98",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "10",
"actual": "10",
"correct": true,
"inputTokens": 9156,
"outputTokens": 2,
"latencyMs": 1517
},
{
"questionId": "q98",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "10",
"correct": true,
"inputTokens": 9284,
"outputTokens": 5,
"latencyMs": 1702
},
{
"questionId": "q98",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "10",
"actual": "10",
"correct": true,
"inputTokens": 7371,
"outputTokens": 2,
"latencyMs": 1676
},
{
"questionId": "q98",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "10",
"actual": "8",
"correct": false,
"inputTokens": 8380,
"outputTokens": 5,
"latencyMs": 1218
},
{
"questionId": "q99",
"format": "json",
"model": "gpt-4o-mini",
"expected": "42342.25",
"actual": "$50,000.00",
"correct": false,
"inputTokens": 9737,
"outputTokens": 7,
"latencyMs": 1407
},
{
"questionId": "q99",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "42342.25",
"actual": "50,847.47",
"correct": false,
"inputTokens": 11902,
"outputTokens": 9,
"latencyMs": 1443
},
{
"questionId": "q99",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "42342.25",
"actual": "Total revenue across all orders is 42,195.36.",
"correct": false,
"inputTokens": 6011,
"outputTokens": 14,
"latencyMs": 1150
},
{
"questionId": "q99",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "42342.25",
"actual": "41,847.47",
"correct": false,
"inputTokens": 6988,
"outputTokens": 9,
"latencyMs": 1774
},
{
"questionId": "q99",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "42342.25",
"actual": "$32,186.73",
"correct": false,
"inputTokens": 6779,
"outputTokens": 7,
"latencyMs": 2654
},
{
"questionId": "q99",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "42342.25",
"actual": "48,847.47",
"correct": false,
"inputTokens": 8409,
"outputTokens": 9,
"latencyMs": 1386
},
{
"questionId": "q99",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "42342.25",
"actual": "$34,186.73",
"correct": false,
"inputTokens": 9156,
"outputTokens": 7,
"latencyMs": 1506
},
{
"questionId": "q99",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "42342.25",
"actual": "48,847.47",
"correct": false,
"inputTokens": 9284,
"outputTokens": 9,
"latencyMs": 1509
},
{
"questionId": "q99",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "42342.25",
"actual": "Total revenue across all orders is 48780.73.",
"correct": false,
"inputTokens": 7371,
"outputTokens": 13,
"latencyMs": 1700
},
{
"questionId": "q99",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "42342.25",
"actual": "47,847.47",
"correct": false,
"inputTokens": 8380,
"outputTokens": 9,
"latencyMs": 1230
},
{
"questionId": "q100",
"format": "json",
"model": "gpt-4o-mini",
"expected": "44",
"actual": "36",
"correct": false,
"inputTokens": 9739,
"outputTokens": 2,
"latencyMs": 1725
},
{
"questionId": "q100",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "44",
"actual": "48",
"correct": false,
"inputTokens": 11904,
"outputTokens": 5,
"latencyMs": 1377
},
{
"questionId": "q100",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "44",
"actual": "34",
"correct": false,
"inputTokens": 6013,
"outputTokens": 2,
"latencyMs": 1399
},
{
"questionId": "q100",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "44",
"actual": "47",
"correct": false,
"inputTokens": 6990,
"outputTokens": 5,
"latencyMs": 1094
},
{
"questionId": "q100",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "44",
"actual": "34",
"correct": false,
"inputTokens": 6781,
"outputTokens": 2,
"latencyMs": 1617
},
{
"questionId": "q100",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "44",
"actual": "47",
"correct": false,
"inputTokens": 8411,
"outputTokens": 5,
"latencyMs": 1344
},
{
"questionId": "q100",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "44",
"actual": "36",
"correct": false,
"inputTokens": 9158,
"outputTokens": 2,
"latencyMs": 2396
},
{
"questionId": "q100",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "44",
"actual": "48",
"correct": false,
"inputTokens": 9286,
"outputTokens": 5,
"latencyMs": 1145
},
{
"questionId": "q100",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "44",
"actual": "36",
"correct": false,
"inputTokens": 7373,
"outputTokens": 2,
"latencyMs": 951
},
{
"questionId": "q100",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "44",
"actual": "45",
"correct": false,
"inputTokens": 8382,
"outputTokens": 5,
"latencyMs": 1311
},
{
"questionId": "q101",
"format": "json",
"model": "gpt-4o-mini",
"expected": "39",
"actual": "34",
"correct": false,
"inputTokens": 9739,
"outputTokens": 2,
"latencyMs": 866
},
{
"questionId": "q101",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "39",
"actual": "38",
"correct": false,
"inputTokens": 11904,
"outputTokens": 5,
"latencyMs": 1964
},
{
"questionId": "q101",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "39",
"actual": "30",
"correct": false,
"inputTokens": 6013,
"outputTokens": 2,
"latencyMs": 1994
},
{
"questionId": "q101",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "39",
"actual": "38",
"correct": false,
"inputTokens": 6990,
"outputTokens": 5,
"latencyMs": 1277
},
{
"questionId": "q101",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "39",
"actual": "32",
"correct": false,
"inputTokens": 6781,
"outputTokens": 2,
"latencyMs": 1884
},
{
"questionId": "q101",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "39",
"actual": "38",
"correct": false,
"inputTokens": 8411,
"outputTokens": 5,
"latencyMs": 1282
},
{
"questionId": "q101",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "39",
"actual": "32",
"correct": false,
"inputTokens": 9158,
"outputTokens": 2,
"latencyMs": 1761
},
{
"questionId": "q101",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "39",
"actual": "38",
"correct": false,
"inputTokens": 9286,
"outputTokens": 5,
"latencyMs": 1250
},
{
"questionId": "q101",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "39",
"actual": "32",
"correct": false,
"inputTokens": 7373,
"outputTokens": 2,
"latencyMs": 1316
},
{
"questionId": "q101",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "39",
"actual": "38",
"correct": false,
"inputTokens": 8382,
"outputTokens": 5,
"latencyMs": 1373
},
{
"questionId": "q102",
"format": "json",
"model": "gpt-4o-mini",
"expected": "32",
"actual": "27",
"correct": false,
"inputTokens": 9739,
"outputTokens": 2,
"latencyMs": 1389
},
{
"questionId": "q102",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "32",
"actual": "28",
"correct": false,
"inputTokens": 11904,
"outputTokens": 5,
"latencyMs": 1215
},
{
"questionId": "q102",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "32",
"actual": "24",
"correct": false,
"inputTokens": 6013,
"outputTokens": 2,
"latencyMs": 1034
},
{
"questionId": "q102",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "32",
"actual": "26",
"correct": false,
"inputTokens": 6990,
"outputTokens": 5,
"latencyMs": 1063
},
{
"questionId": "q102",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "32",
"actual": "25",
"correct": false,
"inputTokens": 6781,
"outputTokens": 2,
"latencyMs": 7312
},
{
"questionId": "q102",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "32",
"actual": "28",
"correct": false,
"inputTokens": 8411,
"outputTokens": 5,
"latencyMs": 1387
},
{
"questionId": "q102",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "32",
"actual": "27",
"correct": false,
"inputTokens": 9158,
"outputTokens": 2,
"latencyMs": 1488
},
{
"questionId": "q102",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "32",
"actual": "28",
"correct": false,
"inputTokens": 9286,
"outputTokens": 5,
"latencyMs": 1268
},
{
"questionId": "q102",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "32",
"actual": "27",
"correct": false,
"inputTokens": 7373,
"outputTokens": 2,
"latencyMs": 1274
},
{
"questionId": "q102",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "32",
"actual": "26",
"correct": false,
"inputTokens": 8382,
"outputTokens": 5,
"latencyMs": 1354
},
{
"questionId": "q103",
"format": "json",
"model": "gpt-4o-mini",
"expected": "6975",
"actual": "6975",
"correct": true,
"inputTokens": 3713,
"outputTokens": 3,
"latencyMs": 1330
},
{
"questionId": "q103",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "6975",
"actual": "6975",
"correct": true,
"inputTokens": 4080,
"outputTokens": 6,
"latencyMs": 1437
},
{
"questionId": "q103",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "6975",
"actual": "6975",
"correct": true,
"inputTokens": 1564,
"outputTokens": 3,
"latencyMs": 1341
},
{
"questionId": "q103",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "6975",
"actual": "6975",
"correct": true,
"inputTokens": 1509,
"outputTokens": 6,
"latencyMs": 1231
},
{
"questionId": "q103",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "6975",
"actual": "6975",
"correct": true,
"inputTokens": 1442,
"outputTokens": 3,
"latencyMs": 2515
},
{
"questionId": "q103",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "6975",
"actual": "6975",
"correct": true,
"inputTokens": 1445,
"outputTokens": 6,
"latencyMs": 1162
},
{
"questionId": "q103",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "6975",
"actual": "6975",
"correct": true,
"inputTokens": 3830,
"outputTokens": 3,
"latencyMs": 868
},
{
"questionId": "q103",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "6975",
"actual": "6975",
"correct": true,
"inputTokens": 3415,
"outputTokens": 6,
"latencyMs": 1149
},
{
"questionId": "q103",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "6975",
"actual": "6975",
"correct": true,
"inputTokens": 2986,
"outputTokens": 3,
"latencyMs": 1183
},
{
"questionId": "q103",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "6975",
"actual": "6975",
"correct": true,
"inputTokens": 3110,
"outputTokens": 6,
"latencyMs": 1119
},
{
"questionId": "q104",
"format": "json",
"model": "gpt-4o-mini",
"expected": "6686.23",
"actual": "6686.23",
"correct": true,
"inputTokens": 3712,
"outputTokens": 5,
"latencyMs": 1273
},
{
"questionId": "q104",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "6686.23",
"actual": "6686.23",
"correct": true,
"inputTokens": 4079,
"outputTokens": 8,
"latencyMs": 1371
},
{
"questionId": "q104",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "6686.23",
"actual": "6686.23",
"correct": true,
"inputTokens": 1563,
"outputTokens": 5,
"latencyMs": 2052
},
{
"questionId": "q104",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "6686.23",
"actual": "6686.23",
"correct": true,
"inputTokens": 1508,
"outputTokens": 8,
"latencyMs": 997
},
{
"questionId": "q104",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "6686.23",
"actual": "6686.23",
"correct": true,
"inputTokens": 1441,
"outputTokens": 5,
"latencyMs": 1152
},
{
"questionId": "q104",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "6686.23",
"actual": "6686.23",
"correct": true,
"inputTokens": 1444,
"outputTokens": 8,
"latencyMs": 1188
},
{
"questionId": "q104",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "6686.23",
"actual": "6686.23",
"correct": true,
"inputTokens": 3829,
"outputTokens": 5,
"latencyMs": 1259
},
{
"questionId": "q104",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "6686.23",
"actual": "6686.23",
"correct": true,
"inputTokens": 3414,
"outputTokens": 8,
"latencyMs": 1239
},
{
"questionId": "q104",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "6686.23",
"actual": "6686.23",
"correct": true,
"inputTokens": 2985,
"outputTokens": 5,
"latencyMs": 1096
},
{
"questionId": "q104",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "6686.23",
"actual": "6686.23",
"correct": true,
"inputTokens": 3109,
"outputTokens": 8,
"latencyMs": 1247
},
{
"questionId": "q105",
"format": "json",
"model": "gpt-4o-mini",
"expected": "7500",
"actual": "7500",
"correct": true,
"inputTokens": 3713,
"outputTokens": 3,
"latencyMs": 1354
},
{
"questionId": "q105",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "7500",
"actual": "7500",
"correct": true,
"inputTokens": 4080,
"outputTokens": 6,
"latencyMs": 1083
},
{
"questionId": "q105",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "7500",
"actual": "7500",
"correct": true,
"inputTokens": 1564,
"outputTokens": 3,
"latencyMs": 869
},
{
"questionId": "q105",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "7500",
"actual": "7500",
"correct": true,
"inputTokens": 1509,
"outputTokens": 6,
"latencyMs": 1051
},
{
"questionId": "q105",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "7500",
"actual": "7500",
"correct": true,
"inputTokens": 1442,
"outputTokens": 3,
"latencyMs": 1528
},
{
"questionId": "q105",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "7500",
"actual": "7500",
"correct": true,
"inputTokens": 1445,
"outputTokens": 6,
"latencyMs": 1126
},
{
"questionId": "q105",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "7500",
"actual": "7500",
"correct": true,
"inputTokens": 3830,
"outputTokens": 3,
"latencyMs": 1136
},
{
"questionId": "q105",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "7500",
"actual": "7500",
"correct": true,
"inputTokens": 3415,
"outputTokens": 6,
"latencyMs": 1121
},
{
"questionId": "q105",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "7500",
"actual": "7500",
"correct": true,
"inputTokens": 2986,
"outputTokens": 3,
"latencyMs": 1217
},
{
"questionId": "q105",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "7500",
"actual": "7500",
"correct": true,
"inputTokens": 3110,
"outputTokens": 6,
"latencyMs": 1099
},
{
"questionId": "q106",
"format": "json",
"model": "gpt-4o-mini",
"expected": "14297.05",
"actual": "14297.05",
"correct": true,
"inputTokens": 3712,
"outputTokens": 5,
"latencyMs": 1416
},
{
"questionId": "q106",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "14297.05",
"actual": "14297.05",
"correct": true,
"inputTokens": 4079,
"outputTokens": 8,
"latencyMs": 1526
},
{
"questionId": "q106",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "14297.05",
"actual": "14297.05",
"correct": true,
"inputTokens": 1563,
"outputTokens": 5,
"latencyMs": 1350
},
{
"questionId": "q106",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "14297.05",
"actual": "14297.05",
"correct": true,
"inputTokens": 1508,
"outputTokens": 8,
"latencyMs": 1330
},
{
"questionId": "q106",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "14297.05",
"actual": "14297.05",
"correct": true,
"inputTokens": 1441,
"outputTokens": 5,
"latencyMs": 2337
},
{
"questionId": "q106",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "14297.05",
"actual": "14297.05",
"correct": true,
"inputTokens": 1444,
"outputTokens": 8,
"latencyMs": 1171
},
{
"questionId": "q106",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "14297.05",
"actual": "14297.05",
"correct": true,
"inputTokens": 3829,
"outputTokens": 5,
"latencyMs": 3128
},
{
"questionId": "q106",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "14297.05",
"actual": "14297.05",
"correct": true,
"inputTokens": 3414,
"outputTokens": 8,
"latencyMs": 1151
},
{
"questionId": "q106",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "14297.05",
"actual": "14297.05",
"correct": true,
"inputTokens": 2985,
"outputTokens": 5,
"latencyMs": 1988
},
{
"questionId": "q106",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "14297.05",
"actual": "14297.05",
"correct": true,
"inputTokens": 3109,
"outputTokens": 8,
"latencyMs": 1166
},
{
"questionId": "q107",
"format": "json",
"model": "gpt-4o-mini",
"expected": "6692",
"actual": "6692",
"correct": true,
"inputTokens": 3713,
"outputTokens": 3,
"latencyMs": 2217
},
{
"questionId": "q107",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "6692",
"actual": "6692",
"correct": true,
"inputTokens": 4080,
"outputTokens": 6,
"latencyMs": 1114
},
{
"questionId": "q107",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "6692",
"actual": "6692",
"correct": true,
"inputTokens": 1564,
"outputTokens": 3,
"latencyMs": 1360
},
{
"questionId": "q107",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "6692",
"actual": "6692",
"correct": true,
"inputTokens": 1509,
"outputTokens": 6,
"latencyMs": 1079
},
{
"questionId": "q107",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "6692",
"actual": "6692",
"correct": true,
"inputTokens": 1442,
"outputTokens": 3,
"latencyMs": 1951
},
{
"questionId": "q107",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "6692",
"actual": "6692",
"correct": true,
"inputTokens": 1445,
"outputTokens": 6,
"latencyMs": 1173
},
{
"questionId": "q107",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "6692",
"actual": "6692",
"correct": true,
"inputTokens": 3830,
"outputTokens": 3,
"latencyMs": 1076
},
{
"questionId": "q107",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "6692",
"actual": "6692",
"correct": true,
"inputTokens": 3415,
"outputTokens": 6,
"latencyMs": 1098
},
{
"questionId": "q107",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "6692",
"actual": "6692",
"correct": true,
"inputTokens": 2986,
"outputTokens": 3,
"latencyMs": 1101
},
{
"questionId": "q107",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "6692",
"actual": "6692",
"correct": true,
"inputTokens": 3110,
"outputTokens": 6,
"latencyMs": 1254
},
{
"questionId": "q108",
"format": "json",
"model": "gpt-4o-mini",
"expected": "9302.76",
"actual": "9302.76",
"correct": true,
"inputTokens": 3712,
"outputTokens": 5,
"latencyMs": 2041
},
{
"questionId": "q108",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "9302.76",
"actual": "9302.76",
"correct": true,
"inputTokens": 4079,
"outputTokens": 8,
"latencyMs": 1405
},
{
"questionId": "q108",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "9302.76",
"actual": "9302.76",
"correct": true,
"inputTokens": 1563,
"outputTokens": 5,
"latencyMs": 1170
},
{
"questionId": "q108",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "9302.76",
"actual": "9302.76",
"correct": true,
"inputTokens": 1508,
"outputTokens": 8,
"latencyMs": 1161
},
{
"questionId": "q108",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "9302.76",
"actual": "9302.76",
"correct": true,
"inputTokens": 1441,
"outputTokens": 5,
"latencyMs": 1326
},
{
"questionId": "q108",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "9302.76",
"actual": "9302.76",
"correct": true,
"inputTokens": 1444,
"outputTokens": 8,
"latencyMs": 1259
},
{
"questionId": "q108",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "9302.76",
"actual": "9302.76",
"correct": true,
"inputTokens": 3829,
"outputTokens": 5,
"latencyMs": 3006
},
{
"questionId": "q108",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "9302.76",
"actual": "9302.76",
"correct": true,
"inputTokens": 3414,
"outputTokens": 8,
"latencyMs": 1461
},
{
"questionId": "q108",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "9302.76",
"actual": "9302.76",
"correct": true,
"inputTokens": 2985,
"outputTokens": 5,
"latencyMs": 3824
},
{
"questionId": "q108",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "9302.76",
"actual": "9302.76",
"correct": true,
"inputTokens": 3109,
"outputTokens": 8,
"latencyMs": 1391
},
{
"questionId": "q109",
"format": "json",
"model": "gpt-4o-mini",
"expected": "3285",
"actual": "3285",
"correct": true,
"inputTokens": 3713,
"outputTokens": 3,
"latencyMs": 1091
},
{
"questionId": "q109",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "3285",
"actual": "3285",
"correct": true,
"inputTokens": 4080,
"outputTokens": 6,
"latencyMs": 1188
},
{
"questionId": "q109",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "3285",
"actual": "3285",
"correct": true,
"inputTokens": 1564,
"outputTokens": 3,
"latencyMs": 1450
},
{
"questionId": "q109",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "3285",
"actual": "3285",
"correct": true,
"inputTokens": 1509,
"outputTokens": 6,
"latencyMs": 1614
},
{
"questionId": "q109",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "3285",
"actual": "3285",
"correct": true,
"inputTokens": 1442,
"outputTokens": 3,
"latencyMs": 1642
},
{
"questionId": "q109",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "3285",
"actual": "3285",
"correct": true,
"inputTokens": 1445,
"outputTokens": 6,
"latencyMs": 1311
},
{
"questionId": "q109",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "3285",
"actual": "3285",
"correct": true,
"inputTokens": 3830,
"outputTokens": 3,
"latencyMs": 1201
},
{
"questionId": "q109",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "3285",
"actual": "3285",
"correct": true,
"inputTokens": 3415,
"outputTokens": 6,
"latencyMs": 1261
},
{
"questionId": "q109",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "3285",
"actual": "3285",
"correct": true,
"inputTokens": 2986,
"outputTokens": 3,
"latencyMs": 856
},
{
"questionId": "q109",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "3285",
"actual": "3285",
"correct": true,
"inputTokens": 3110,
"outputTokens": 6,
"latencyMs": 980
},
{
"questionId": "q110",
"format": "json",
"model": "gpt-4o-mini",
"expected": "3826.93",
"actual": "3826.93",
"correct": true,
"inputTokens": 3712,
"outputTokens": 5,
"latencyMs": 3090
},
{
"questionId": "q110",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "3826.93",
"actual": "3826.93",
"correct": true,
"inputTokens": 4079,
"outputTokens": 8,
"latencyMs": 1123
},
{
"questionId": "q110",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "3826.93",
"actual": "3826.93",
"correct": true,
"inputTokens": 1563,
"outputTokens": 5,
"latencyMs": 2911
},
{
"questionId": "q110",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "3826.93",
"actual": "3826.93",
"correct": true,
"inputTokens": 1508,
"outputTokens": 8,
"latencyMs": 979
},
{
"questionId": "q110",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "3826.93",
"actual": "3826.93",
"correct": true,
"inputTokens": 1441,
"outputTokens": 5,
"latencyMs": 1118
},
{
"questionId": "q110",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "3826.93",
"actual": "3826.93",
"correct": true,
"inputTokens": 1444,
"outputTokens": 8,
"latencyMs": 943
},
{
"questionId": "q110",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "3826.93",
"actual": "3826.93",
"correct": true,
"inputTokens": 3829,
"outputTokens": 5,
"latencyMs": 2639
},
{
"questionId": "q110",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "3826.93",
"actual": "3826.93",
"correct": true,
"inputTokens": 3414,
"outputTokens": 8,
"latencyMs": 1187
},
{
"questionId": "q110",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "3826.93",
"actual": "3826.93",
"correct": true,
"inputTokens": 2985,
"outputTokens": 5,
"latencyMs": 2402
},
{
"questionId": "q110",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "3826.93",
"actual": "3826.93",
"correct": true,
"inputTokens": 3109,
"outputTokens": 8,
"latencyMs": 1723
},
{
"questionId": "q111",
"format": "json",
"model": "gpt-4o-mini",
"expected": "6191",
"actual": "6191",
"correct": true,
"inputTokens": 3713,
"outputTokens": 3,
"latencyMs": 2401
},
{
"questionId": "q111",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "6191",
"actual": "6191",
"correct": true,
"inputTokens": 4080,
"outputTokens": 6,
"latencyMs": 1117
},
{
"questionId": "q111",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "6191",
"actual": "6191",
"correct": true,
"inputTokens": 1564,
"outputTokens": 3,
"latencyMs": 1568
},
{
"questionId": "q111",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "6191",
"actual": "6191",
"correct": true,
"inputTokens": 1509,
"outputTokens": 6,
"latencyMs": 1132
},
{
"questionId": "q111",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "6191",
"actual": "6191",
"correct": true,
"inputTokens": 1442,
"outputTokens": 3,
"latencyMs": 1478
},
{
"questionId": "q111",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "6191",
"actual": "6191",
"correct": true,
"inputTokens": 1445,
"outputTokens": 6,
"latencyMs": 1831
},
{
"questionId": "q111",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "6191",
"actual": "6191",
"correct": true,
"inputTokens": 3830,
"outputTokens": 3,
"latencyMs": 1631
},
{
"questionId": "q111",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "6191",
"actual": "6191",
"correct": true,
"inputTokens": 3415,
"outputTokens": 6,
"latencyMs": 1371
},
{
"questionId": "q111",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "6191",
"actual": "6191",
"correct": true,
"inputTokens": 2986,
"outputTokens": 3,
"latencyMs": 1209
},
{
"questionId": "q111",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "6191",
"actual": "6191",
"correct": true,
"inputTokens": 3110,
"outputTokens": 6,
"latencyMs": 1411
},
{
"questionId": "q112",
"format": "json",
"model": "gpt-4o-mini",
"expected": "1854.66",
"actual": "1854.66",
"correct": true,
"inputTokens": 3712,
"outputTokens": 5,
"latencyMs": 1773
},
{
"questionId": "q112",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "1854.66",
"actual": "1854.66",
"correct": true,
"inputTokens": 4079,
"outputTokens": 8,
"latencyMs": 1090
},
{
"questionId": "q112",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "1854.66",
"actual": "1854.66",
"correct": true,
"inputTokens": 1563,
"outputTokens": 5,
"latencyMs": 1354
},
{
"questionId": "q112",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "1854.66",
"actual": "1854.66",
"correct": true,
"inputTokens": 1508,
"outputTokens": 8,
"latencyMs": 1095
},
{
"questionId": "q112",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "1854.66",
"actual": "1854.66",
"correct": true,
"inputTokens": 1441,
"outputTokens": 5,
"latencyMs": 1135
},
{
"questionId": "q112",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "1854.66",
"actual": "1854.66",
"correct": true,
"inputTokens": 1444,
"outputTokens": 8,
"latencyMs": 976
},
{
"questionId": "q112",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "1854.66",
"actual": "1854.66",
"correct": true,
"inputTokens": 3829,
"outputTokens": 5,
"latencyMs": 1311
},
{
"questionId": "q112",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "1854.66",
"actual": "1854.66",
"correct": true,
"inputTokens": 3414,
"outputTokens": 8,
"latencyMs": 1287
},
{
"questionId": "q112",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "1854.66",
"actual": "1854.66",
"correct": true,
"inputTokens": 2985,
"outputTokens": 5,
"latencyMs": 1288
},
{
"questionId": "q112",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "1854.66",
"actual": "1854.66",
"correct": true,
"inputTokens": 3109,
"outputTokens": 8,
"latencyMs": 1157
},
{
"questionId": "q113",
"format": "json",
"model": "gpt-4o-mini",
"expected": "4696",
"actual": "4696",
"correct": true,
"inputTokens": 3713,
"outputTokens": 3,
"latencyMs": 1328
},
{
"questionId": "q113",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "4696",
"actual": "4696",
"correct": true,
"inputTokens": 4080,
"outputTokens": 6,
"latencyMs": 1068
},
{
"questionId": "q113",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "4696",
"actual": "4696",
"correct": true,
"inputTokens": 1564,
"outputTokens": 3,
"latencyMs": 1020
},
{
"questionId": "q113",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "4696",
"actual": "4696",
"correct": true,
"inputTokens": 1509,
"outputTokens": 6,
"latencyMs": 1069
},
{
"questionId": "q113",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "4696",
"actual": "4696",
"correct": true,
"inputTokens": 1442,
"outputTokens": 3,
"latencyMs": 968
},
{
"questionId": "q113",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "4696",
"actual": "4696",
"correct": true,
"inputTokens": 1445,
"outputTokens": 6,
"latencyMs": 1436
},
{
"questionId": "q113",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "4696",
"actual": "4696",
"correct": true,
"inputTokens": 3830,
"outputTokens": 3,
"latencyMs": 1171
},
{
"questionId": "q113",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "4696",
"actual": "4696",
"correct": true,
"inputTokens": 3415,
"outputTokens": 6,
"latencyMs": 1273
},
{
"questionId": "q113",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "4696",
"actual": "4696",
"correct": true,
"inputTokens": 2986,
"outputTokens": 3,
"latencyMs": 1788
},
{
"questionId": "q113",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "4696",
"actual": "4696",
"correct": true,
"inputTokens": 3110,
"outputTokens": 6,
"latencyMs": 1050
},
{
"questionId": "q114",
"format": "json",
"model": "gpt-4o-mini",
"expected": "4211.6",
"actual": "4211.6",
"correct": true,
"inputTokens": 3712,
"outputTokens": 5,
"latencyMs": 1414
},
{
"questionId": "q114",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "4211.6",
"actual": "4211.6",
"correct": true,
"inputTokens": 4079,
"outputTokens": 8,
"latencyMs": 1192
},
{
"questionId": "q114",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "4211.6",
"actual": "4211.6",
"correct": true,
"inputTokens": 1563,
"outputTokens": 5,
"latencyMs": 893
},
{
"questionId": "q114",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "4211.6",
"actual": "4211.6",
"correct": true,
"inputTokens": 1508,
"outputTokens": 8,
"latencyMs": 1065
},
{
"questionId": "q114",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "4211.6",
"actual": "4211.6",
"correct": true,
"inputTokens": 1441,
"outputTokens": 5,
"latencyMs": 1155
},
{
"questionId": "q114",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "4211.6",
"actual": "4211.6",
"correct": true,
"inputTokens": 1444,
"outputTokens": 8,
"latencyMs": 1842
},
{
"questionId": "q114",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "4211.6",
"actual": "4211.6",
"correct": true,
"inputTokens": 3829,
"outputTokens": 5,
"latencyMs": 2740
},
{
"questionId": "q114",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "4211.6",
"actual": "4211.6",
"correct": true,
"inputTokens": 3414,
"outputTokens": 8,
"latencyMs": 1295
},
{
"questionId": "q114",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "4211.6",
"actual": "4211.6",
"correct": true,
"inputTokens": 2985,
"outputTokens": 5,
"latencyMs": 1053
},
{
"questionId": "q114",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "4211.6",
"actual": "4211.6",
"correct": true,
"inputTokens": 3109,
"outputTokens": 8,
"latencyMs": 1118
},
{
"questionId": "q115",
"format": "json",
"model": "gpt-4o-mini",
"expected": "6196",
"actual": "6196",
"correct": true,
"inputTokens": 3713,
"outputTokens": 3,
"latencyMs": 1452
},
{
"questionId": "q115",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "6196",
"actual": "6196",
"correct": true,
"inputTokens": 4080,
"outputTokens": 6,
"latencyMs": 1272
},
{
"questionId": "q115",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "6196",
"actual": "6196",
"correct": true,
"inputTokens": 1564,
"outputTokens": 3,
"latencyMs": 1039
},
{
"questionId": "q115",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "6196",
"actual": "6196",
"correct": true,
"inputTokens": 1509,
"outputTokens": 6,
"latencyMs": 1155
},
{
"questionId": "q115",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "6196",
"actual": "6196",
"correct": true,
"inputTokens": 1442,
"outputTokens": 3,
"latencyMs": 796
},
{
"questionId": "q115",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "6196",
"actual": "6196",
"correct": true,
"inputTokens": 1445,
"outputTokens": 6,
"latencyMs": 1048
},
{
"questionId": "q115",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "6196",
"actual": "6196",
"correct": true,
"inputTokens": 3830,
"outputTokens": 3,
"latencyMs": 2282
},
{
"questionId": "q115",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "6196",
"actual": "6196",
"correct": true,
"inputTokens": 3415,
"outputTokens": 6,
"latencyMs": 1592
},
{
"questionId": "q115",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "6196",
"actual": "6196",
"correct": true,
"inputTokens": 2986,
"outputTokens": 3,
"latencyMs": 2691
},
{
"questionId": "q115",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "6196",
"actual": "6196",
"correct": true,
"inputTokens": 3110,
"outputTokens": 6,
"latencyMs": 1126
},
{
"questionId": "q116",
"format": "json",
"model": "gpt-4o-mini",
"expected": "6105.3",
"actual": "6105.3",
"correct": true,
"inputTokens": 3712,
"outputTokens": 5,
"latencyMs": 1288
},
{
"questionId": "q116",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "6105.3",
"actual": "6105.30",
"correct": true,
"inputTokens": 4079,
"outputTokens": 8,
"latencyMs": 991
},
{
"questionId": "q116",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "6105.3",
"actual": "6105.3",
"correct": true,
"inputTokens": 1563,
"outputTokens": 5,
"latencyMs": 1257
},
{
"questionId": "q116",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "6105.3",
"actual": "6105.3",
"correct": true,
"inputTokens": 1508,
"outputTokens": 8,
"latencyMs": 1004
},
{
"questionId": "q116",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "6105.3",
"actual": "6105.3",
"correct": true,
"inputTokens": 1441,
"outputTokens": 5,
"latencyMs": 1620
},
{
"questionId": "q116",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "6105.3",
"actual": "6105.3",
"correct": true,
"inputTokens": 1444,
"outputTokens": 8,
"latencyMs": 991
},
{
"questionId": "q116",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "6105.3",
"actual": "6105.3",
"correct": true,
"inputTokens": 3829,
"outputTokens": 5,
"latencyMs": 1048
},
{
"questionId": "q116",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "6105.3",
"actual": "6105.3",
"correct": true,
"inputTokens": 3414,
"outputTokens": 8,
"latencyMs": 1189
},
{
"questionId": "q116",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "6105.3",
"actual": "6105.3",
"correct": true,
"inputTokens": 2985,
"outputTokens": 5,
"latencyMs": 3282
},
{
"questionId": "q116",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "6105.3",
"actual": "6105.3",
"correct": true,
"inputTokens": 3109,
"outputTokens": 8,
"latencyMs": 985
},
{
"questionId": "q117",
"format": "json",
"model": "gpt-4o-mini",
"expected": "6528",
"actual": "6528",
"correct": true,
"inputTokens": 3713,
"outputTokens": 3,
"latencyMs": 871
},
{
"questionId": "q117",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "6528",
"actual": "6528",
"correct": true,
"inputTokens": 4080,
"outputTokens": 6,
"latencyMs": 1042
},
{
"questionId": "q117",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "6528",
"actual": "6528",
"correct": true,
"inputTokens": 1564,
"outputTokens": 3,
"latencyMs": 999
},
{
"questionId": "q117",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "6528",
"actual": "6528",
"correct": true,
"inputTokens": 1509,
"outputTokens": 6,
"latencyMs": 1111
},
{
"questionId": "q117",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "6528",
"actual": "6528",
"correct": true,
"inputTokens": 1442,
"outputTokens": 3,
"latencyMs": 1132
},
{
"questionId": "q117",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "6528",
"actual": "6528",
"correct": true,
"inputTokens": 1445,
"outputTokens": 6,
"latencyMs": 1004
},
{
"questionId": "q117",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "6528",
"actual": "6528",
"correct": true,
"inputTokens": 3830,
"outputTokens": 3,
"latencyMs": 1162
},
{
"questionId": "q117",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "6528",
"actual": "6528",
"correct": true,
"inputTokens": 3415,
"outputTokens": 6,
"latencyMs": 1271
},
{
"questionId": "q117",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "6528",
"actual": "6528",
"correct": true,
"inputTokens": 2986,
"outputTokens": 3,
"latencyMs": 961
},
{
"questionId": "q117",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "6528",
"actual": "6528",
"correct": true,
"inputTokens": 3110,
"outputTokens": 6,
"latencyMs": 1289
},
{
"questionId": "q118",
"format": "json",
"model": "gpt-4o-mini",
"expected": "1136.09",
"actual": "1136.09",
"correct": true,
"inputTokens": 3712,
"outputTokens": 5,
"latencyMs": 1634
},
{
"questionId": "q118",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "1136.09",
"actual": "1136.09",
"correct": true,
"inputTokens": 4079,
"outputTokens": 8,
"latencyMs": 1198
},
{
"questionId": "q118",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "1136.09",
"actual": "1136.09",
"correct": true,
"inputTokens": 1563,
"outputTokens": 5,
"latencyMs": 2678
},
{
"questionId": "q118",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "1136.09",
"actual": "1136.09",
"correct": true,
"inputTokens": 1508,
"outputTokens": 8,
"latencyMs": 1155
},
{
"questionId": "q118",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "1136.09",
"actual": "1136.09",
"correct": true,
"inputTokens": 1441,
"outputTokens": 5,
"latencyMs": 1104
},
{
"questionId": "q118",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "1136.09",
"actual": "1136.09",
"correct": true,
"inputTokens": 1444,
"outputTokens": 8,
"latencyMs": 1109
},
{
"questionId": "q118",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "1136.09",
"actual": "1136.09",
"correct": true,
"inputTokens": 3829,
"outputTokens": 5,
"latencyMs": 3756
},
{
"questionId": "q118",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "1136.09",
"actual": "1136.09",
"correct": true,
"inputTokens": 3414,
"outputTokens": 8,
"latencyMs": 1082
},
{
"questionId": "q118",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "1136.09",
"actual": "1136.09",
"correct": true,
"inputTokens": 2985,
"outputTokens": 5,
"latencyMs": 1451
},
{
"questionId": "q118",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "1136.09",
"actual": "1136.09",
"correct": true,
"inputTokens": 3109,
"outputTokens": 8,
"latencyMs": 1730
},
{
"questionId": "q119",
"format": "json",
"model": "gpt-4o-mini",
"expected": "4689",
"actual": "4689",
"correct": true,
"inputTokens": 3713,
"outputTokens": 3,
"latencyMs": 1327
},
{
"questionId": "q119",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "4689",
"actual": "4689",
"correct": true,
"inputTokens": 4080,
"outputTokens": 6,
"latencyMs": 1282
},
{
"questionId": "q119",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "4689",
"actual": "4689",
"correct": true,
"inputTokens": 1564,
"outputTokens": 3,
"latencyMs": 1368
},
{
"questionId": "q119",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "4689",
"actual": "4689",
"correct": true,
"inputTokens": 1509,
"outputTokens": 6,
"latencyMs": 1487
},
{
"questionId": "q119",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "4689",
"actual": "4689",
"correct": true,
"inputTokens": 1442,
"outputTokens": 3,
"latencyMs": 2752
},
{
"questionId": "q119",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "4689",
"actual": "4689",
"correct": true,
"inputTokens": 1445,
"outputTokens": 6,
"latencyMs": 909
},
{
"questionId": "q119",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "4689",
"actual": "4689",
"correct": true,
"inputTokens": 3830,
"outputTokens": 3,
"latencyMs": 3502
},
{
"questionId": "q119",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "4689",
"actual": "4689",
"correct": true,
"inputTokens": 3415,
"outputTokens": 6,
"latencyMs": 1212
},
{
"questionId": "q119",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "4689",
"actual": "4689",
"correct": true,
"inputTokens": 2986,
"outputTokens": 3,
"latencyMs": 1218
},
{
"questionId": "q119",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "4689",
"actual": "4689",
"correct": true,
"inputTokens": 3110,
"outputTokens": 6,
"latencyMs": 1064
},
{
"questionId": "q120",
"format": "json",
"model": "gpt-4o-mini",
"expected": "2637.73",
"actual": "2637.73",
"correct": true,
"inputTokens": 3712,
"outputTokens": 5,
"latencyMs": 2777
},
{
"questionId": "q120",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "2637.73",
"actual": "2637.73",
"correct": true,
"inputTokens": 4079,
"outputTokens": 8,
"latencyMs": 1246
},
{
"questionId": "q120",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "2637.73",
"actual": "2637.73",
"correct": true,
"inputTokens": 1563,
"outputTokens": 5,
"latencyMs": 1424
},
{
"questionId": "q120",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "2637.73",
"actual": "2637.73",
"correct": true,
"inputTokens": 1508,
"outputTokens": 8,
"latencyMs": 1074
},
{
"questionId": "q120",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "2637.73",
"actual": "2637.73",
"correct": true,
"inputTokens": 1441,
"outputTokens": 5,
"latencyMs": 2803
},
{
"questionId": "q120",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "2637.73",
"actual": "2637.73",
"correct": true,
"inputTokens": 1444,
"outputTokens": 8,
"latencyMs": 1107
},
{
"questionId": "q120",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "2637.73",
"actual": "2637.73",
"correct": true,
"inputTokens": 3829,
"outputTokens": 5,
"latencyMs": 1066
},
{
"questionId": "q120",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "2637.73",
"actual": "2637.73",
"correct": true,
"inputTokens": 3414,
"outputTokens": 8,
"latencyMs": 1325
},
{
"questionId": "q120",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "2637.73",
"actual": "2637.73",
"correct": true,
"inputTokens": 2985,
"outputTokens": 5,
"latencyMs": 1330
},
{
"questionId": "q120",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "2637.73",
"actual": "2637.73",
"correct": true,
"inputTokens": 3109,
"outputTokens": 8,
"latencyMs": 1192
},
{
"questionId": "q121",
"format": "json",
"model": "gpt-4o-mini",
"expected": "5685",
"actual": "5685",
"correct": true,
"inputTokens": 3713,
"outputTokens": 3,
"latencyMs": 1139
},
{
"questionId": "q121",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "5685",
"actual": "5685",
"correct": true,
"inputTokens": 4080,
"outputTokens": 6,
"latencyMs": 994
},
{
"questionId": "q121",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "5685",
"actual": "5685",
"correct": true,
"inputTokens": 1564,
"outputTokens": 3,
"latencyMs": 1309
},
{
"questionId": "q121",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "5685",
"actual": "5685",
"correct": true,
"inputTokens": 1509,
"outputTokens": 6,
"latencyMs": 1184
},
{
"questionId": "q121",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "5685",
"actual": "5685",
"correct": true,
"inputTokens": 1442,
"outputTokens": 3,
"latencyMs": 1182
},
{
"questionId": "q121",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "5685",
"actual": "5685",
"correct": true,
"inputTokens": 1445,
"outputTokens": 6,
"latencyMs": 1381
},
{
"questionId": "q121",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "5685",
"actual": "5685",
"correct": true,
"inputTokens": 3830,
"outputTokens": 3,
"latencyMs": 1103
},
{
"questionId": "q121",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "5685",
"actual": "5685",
"correct": true,
"inputTokens": 3415,
"outputTokens": 6,
"latencyMs": 1220
},
{
"questionId": "q121",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "5685",
"actual": "5685",
"correct": true,
"inputTokens": 2986,
"outputTokens": 3,
"latencyMs": 1169
},
{
"questionId": "q121",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "5685",
"actual": "5685",
"correct": true,
"inputTokens": 3110,
"outputTokens": 6,
"latencyMs": 1208
},
{
"questionId": "q122",
"format": "json",
"model": "gpt-4o-mini",
"expected": "3421.06",
"actual": "3421.06",
"correct": true,
"inputTokens": 3712,
"outputTokens": 5,
"latencyMs": 1037
},
{
"questionId": "q122",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "3421.06",
"actual": "3421.06",
"correct": true,
"inputTokens": 4079,
"outputTokens": 8,
"latencyMs": 1278
},
{
"questionId": "q122",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "3421.06",
"actual": "3421.06",
"correct": true,
"inputTokens": 1563,
"outputTokens": 5,
"latencyMs": 1441
},
{
"questionId": "q122",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "3421.06",
"actual": "3421.06",
"correct": true,
"inputTokens": 1508,
"outputTokens": 8,
"latencyMs": 1204
},
{
"questionId": "q122",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "3421.06",
"actual": "3421.06",
"correct": true,
"inputTokens": 1441,
"outputTokens": 5,
"latencyMs": 1782
},
{
"questionId": "q122",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "3421.06",
"actual": "3421.06",
"correct": true,
"inputTokens": 1444,
"outputTokens": 8,
"latencyMs": 1088
},
{
"questionId": "q122",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "3421.06",
"actual": "3421.06",
"correct": true,
"inputTokens": 3829,
"outputTokens": 5,
"latencyMs": 1447
},
{
"questionId": "q122",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "3421.06",
"actual": "3421.06",
"correct": true,
"inputTokens": 3414,
"outputTokens": 8,
"latencyMs": 1356
},
{
"questionId": "q122",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "3421.06",
"actual": "3421.06",
"correct": true,
"inputTokens": 2985,
"outputTokens": 5,
"latencyMs": 1309
},
{
"questionId": "q122",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "3421.06",
"actual": "3421.06",
"correct": true,
"inputTokens": 3109,
"outputTokens": 8,
"latencyMs": 995
},
{
"questionId": "q123",
"format": "json",
"model": "gpt-4o-mini",
"expected": "344498",
"actual": "188,000",
"correct": false,
"inputTokens": 3710,
"outputTokens": 4,
"latencyMs": 1405
},
{
"questionId": "q123",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "344498",
"actual": "188,945",
"correct": false,
"inputTokens": 4077,
"outputTokens": 7,
"latencyMs": 1110
},
{
"questionId": "q123",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "344498",
"actual": "186,000",
"correct": false,
"inputTokens": 1561,
"outputTokens": 4,
"latencyMs": 1306
},
{
"questionId": "q123",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "344498",
"actual": "337,045",
"correct": false,
"inputTokens": 1506,
"outputTokens": 7,
"latencyMs": 1292
},
{
"questionId": "q123",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "344498",
"actual": "188,000",
"correct": false,
"inputTokens": 1439,
"outputTokens": 4,
"latencyMs": 2659
},
{
"questionId": "q123",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "344498",
"actual": "372,915",
"correct": false,
"inputTokens": 1442,
"outputTokens": 7,
"latencyMs": 966
},
{
"questionId": "q123",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "344498",
"actual": "174,000",
"correct": false,
"inputTokens": 3827,
"outputTokens": 4,
"latencyMs": 1177
},
{
"questionId": "q123",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "344498",
"actual": "188,647",
"correct": false,
"inputTokens": 3412,
"outputTokens": 7,
"latencyMs": 1018
},
{
"questionId": "q123",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "344498",
"actual": "188,000",
"correct": false,
"inputTokens": 2983,
"outputTokens": 4,
"latencyMs": 1659
},
{
"questionId": "q123",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "344498",
"actual": "181,854",
"correct": false,
"inputTokens": 3107,
"outputTokens": 7,
"latencyMs": 1894
},
{
"questionId": "q124",
"format": "json",
"model": "gpt-4o-mini",
"expected": "312818.50",
"actual": "188,174.36",
"correct": false,
"inputTokens": 3708,
"outputTokens": 6,
"latencyMs": 2900
},
{
"questionId": "q124",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "312818.50",
"actual": "287,745.89",
"correct": false,
"inputTokens": 4075,
"outputTokens": 9,
"latencyMs": 1196
},
{
"questionId": "q124",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "312818.50",
"actual": "Total revenue across all dates is 139,155.36.",
"correct": false,
"inputTokens": 1559,
"outputTokens": 14,
"latencyMs": 1401
},
{
"questionId": "q124",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "312818.50",
"actual": "487,891.45",
"correct": false,
"inputTokens": 1504,
"outputTokens": 9,
"latencyMs": 1118
},
{
"questionId": "q124",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "312818.50",
"actual": "Total revenue across all dates is 155,000.00.",
"correct": false,
"inputTokens": 1437,
"outputTokens": 14,
"latencyMs": 1308
},
{
"questionId": "q124",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "312818.50",
"actual": "487,891.89",
"correct": false,
"inputTokens": 1440,
"outputTokens": 9,
"latencyMs": 1120
},
{
"questionId": "q124",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "312818.50",
"actual": "Total revenue across all dates is 155,155.36.",
"correct": false,
"inputTokens": 3825,
"outputTokens": 14,
"latencyMs": 1143
},
{
"questionId": "q124",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "312818.50",
"actual": "381,968.89",
"correct": false,
"inputTokens": 3410,
"outputTokens": 9,
"latencyMs": 1172
},
{
"questionId": "q124",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "312818.50",
"actual": "Total revenue across all dates is 155,155.36.",
"correct": false,
"inputTokens": 2981,
"outputTokens": 14,
"latencyMs": 1179
},
{
"questionId": "q124",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "312818.50",
"actual": "381,847.89",
"correct": false,
"inputTokens": 3105,
"outputTokens": 9,
"latencyMs": 1073
},
{
"questionId": "q125",
"format": "json",
"model": "gpt-4o-mini",
"expected": "1811",
"actual": "1030",
"correct": false,
"inputTokens": 3710,
"outputTokens": 3,
"latencyMs": 3823
},
{
"questionId": "q125",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "1811",
"actual": "1,234",
"correct": false,
"inputTokens": 4078,
"outputTokens": 7,
"latencyMs": 1153
},
{
"questionId": "q125",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "1811",
"actual": "1040",
"correct": false,
"inputTokens": 1561,
"outputTokens": 3,
"latencyMs": 1472
},
{
"questionId": "q125",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "1811",
"actual": "1,945",
"correct": false,
"inputTokens": 1507,
"outputTokens": 7,
"latencyMs": 940
},
{
"questionId": "q125",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "1811",
"actual": "1030",
"correct": false,
"inputTokens": 1439,
"outputTokens": 3,
"latencyMs": 1067
},
{
"questionId": "q125",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "1811",
"actual": "1,945",
"correct": false,
"inputTokens": 1443,
"outputTokens": 7,
"latencyMs": 1183
},
{
"questionId": "q125",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "1811",
"actual": "Total conversions: 1030",
"correct": false,
"inputTokens": 3827,
"outputTokens": 7,
"latencyMs": 1103
},
{
"questionId": "q125",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "1811",
"actual": "1,454",
"correct": false,
"inputTokens": 3413,
"outputTokens": 7,
"latencyMs": 1067
},
{
"questionId": "q125",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "1811",
"actual": "1040",
"correct": false,
"inputTokens": 2983,
"outputTokens": 3,
"latencyMs": 932
},
{
"questionId": "q125",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "1811",
"actual": "1,454",
"correct": false,
"inputTokens": 3108,
"outputTokens": 7,
"latencyMs": 1530
},
{
"questionId": "q126",
"format": "json",
"model": "gpt-4o-mini",
"expected": "42",
"actual": "42",
"correct": true,
"inputTokens": 3710,
"outputTokens": 2,
"latencyMs": 1016
},
{
"questionId": "q126",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "42",
"actual": "42",
"correct": true,
"inputTokens": 4078,
"outputTokens": 5,
"latencyMs": 1440
},
{
"questionId": "q126",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "42",
"actual": "24",
"correct": false,
"inputTokens": 1561,
"outputTokens": 2,
"latencyMs": 1206
},
{
"questionId": "q126",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "42",
"actual": "42",
"correct": true,
"inputTokens": 1507,
"outputTokens": 5,
"latencyMs": 1452
},
{
"questionId": "q126",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "42",
"actual": "22",
"correct": false,
"inputTokens": 1439,
"outputTokens": 2,
"latencyMs": 1249
},
{
"questionId": "q126",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "42",
"actual": "42",
"correct": true,
"inputTokens": 1443,
"outputTokens": 5,
"latencyMs": 1248
},
{
"questionId": "q126",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "42",
"actual": "20",
"correct": false,
"inputTokens": 3827,
"outputTokens": 2,
"latencyMs": 1420
},
{
"questionId": "q126",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "42",
"actual": "47",
"correct": false,
"inputTokens": 3413,
"outputTokens": 5,
"latencyMs": 900
},
{
"questionId": "q126",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "42",
"actual": "42",
"correct": true,
"inputTokens": 2983,
"outputTokens": 2,
"latencyMs": 1309
},
{
"questionId": "q126",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "42",
"actual": "47",
"correct": false,
"inputTokens": 3108,
"outputTokens": 5,
"latencyMs": 1216
},
{
"questionId": "q127",
"format": "json",
"model": "gpt-4o-mini",
"expected": "28",
"actual": "38",
"correct": false,
"inputTokens": 3710,
"outputTokens": 2,
"latencyMs": 3911
},
{
"questionId": "q127",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "28",
"actual": "24",
"correct": false,
"inputTokens": 4078,
"outputTokens": 5,
"latencyMs": 1056
},
{
"questionId": "q127",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "28",
"actual": "20",
"correct": false,
"inputTokens": 1561,
"outputTokens": 2,
"latencyMs": 839
},
{
"questionId": "q127",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "28",
"actual": "26",
"correct": false,
"inputTokens": 1507,
"outputTokens": 5,
"latencyMs": 965
},
{
"questionId": "q127",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "28",
"actual": "20",
"correct": false,
"inputTokens": 1439,
"outputTokens": 2,
"latencyMs": 2163
},
{
"questionId": "q127",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "28",
"actual": "23",
"correct": false,
"inputTokens": 1443,
"outputTokens": 5,
"latencyMs": 1006
},
{
"questionId": "q127",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "28",
"actual": "18",
"correct": false,
"inputTokens": 3827,
"outputTokens": 2,
"latencyMs": 2619
},
{
"questionId": "q127",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "28",
"actual": "24",
"correct": false,
"inputTokens": 3413,
"outputTokens": 5,
"latencyMs": 989
},
{
"questionId": "q127",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "28",
"actual": "22",
"correct": false,
"inputTokens": 2983,
"outputTokens": 2,
"latencyMs": 1830
},
{
"questionId": "q127",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "28",
"actual": "23",
"correct": false,
"inputTokens": 3108,
"outputTokens": 5,
"latencyMs": 1001
},
{
"questionId": "q128",
"format": "json",
"model": "gpt-4o-mini",
"expected": "11",
"actual": "15",
"correct": false,
"inputTokens": 3710,
"outputTokens": 2,
"latencyMs": 1217
},
{
"questionId": "q128",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "11",
"actual": "11",
"correct": true,
"inputTokens": 4078,
"outputTokens": 5,
"latencyMs": 3180
},
{
"questionId": "q128",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "11",
"actual": "15",
"correct": false,
"inputTokens": 1561,
"outputTokens": 2,
"latencyMs": 1076
},
{
"questionId": "q128",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "11",
"actual": "12",
"correct": false,
"inputTokens": 1507,
"outputTokens": 5,
"latencyMs": 912
},
{
"questionId": "q128",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "11",
"actual": "15",
"correct": false,
"inputTokens": 1439,
"outputTokens": 2,
"latencyMs": 2900
},
{
"questionId": "q128",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "11",
"actual": "11",
"correct": true,
"inputTokens": 1443,
"outputTokens": 5,
"latencyMs": 1389
},
{
"questionId": "q128",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "11",
"actual": "12",
"correct": false,
"inputTokens": 3827,
"outputTokens": 2,
"latencyMs": 1107
},
{
"questionId": "q128",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "11",
"actual": "11",
"correct": true,
"inputTokens": 3413,
"outputTokens": 5,
"latencyMs": 1150
},
{
"questionId": "q128",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "11",
"actual": "18",
"correct": false,
"inputTokens": 2983,
"outputTokens": 2,
"latencyMs": 1047
},
{
"questionId": "q128",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "11",
"actual": "11",
"correct": true,
"inputTokens": 3108,
"outputTokens": 5,
"latencyMs": 1169
},
{
"questionId": "q129",
"format": "json",
"model": "gpt-4o-mini",
"expected": "58",
"actual": "36",
"correct": false,
"inputTokens": 3709,
"outputTokens": 2,
"latencyMs": 1007
},
{
"questionId": "q129",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "58",
"actual": "50",
"correct": false,
"inputTokens": 4078,
"outputTokens": 5,
"latencyMs": 1342
},
{
"questionId": "q129",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "58",
"actual": "24",
"correct": false,
"inputTokens": 1560,
"outputTokens": 2,
"latencyMs": 828
},
{
"questionId": "q129",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "58",
"actual": "47",
"correct": false,
"inputTokens": 1507,
"outputTokens": 5,
"latencyMs": 1305
},
{
"questionId": "q129",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "58",
"actual": "15",
"correct": false,
"inputTokens": 1438,
"outputTokens": 2,
"latencyMs": 1305
},
{
"questionId": "q129",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "58",
"actual": "54",
"correct": false,
"inputTokens": 1443,
"outputTokens": 5,
"latencyMs": 1406
},
{
"questionId": "q129",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "58",
"actual": "18",
"correct": false,
"inputTokens": 3826,
"outputTokens": 2,
"latencyMs": 1513
},
{
"questionId": "q129",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "58",
"actual": "47",
"correct": false,
"inputTokens": 3413,
"outputTokens": 5,
"latencyMs": 1026
},
{
"questionId": "q129",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "58",
"actual": "42",
"correct": false,
"inputTokens": 2982,
"outputTokens": 2,
"latencyMs": 1373
},
{
"questionId": "q129",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "58",
"actual": "54",
"correct": false,
"inputTokens": 3108,
"outputTokens": 5,
"latencyMs": 1112
},
{
"questionId": "q130",
"format": "json",
"model": "gpt-4o-mini",
"expected": "41",
"actual": "34",
"correct": false,
"inputTokens": 3709,
"outputTokens": 2,
"latencyMs": 1248
},
{
"questionId": "q130",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "41",
"actual": "31",
"correct": false,
"inputTokens": 4078,
"outputTokens": 5,
"latencyMs": 1083
},
{
"questionId": "q130",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "41",
"actual": "24",
"correct": false,
"inputTokens": 1560,
"outputTokens": 2,
"latencyMs": 895
},
{
"questionId": "q130",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "41",
"actual": "38",
"correct": false,
"inputTokens": 1507,
"outputTokens": 5,
"latencyMs": 1087
},
{
"questionId": "q130",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "41",
"actual": "18",
"correct": false,
"inputTokens": 1438,
"outputTokens": 2,
"latencyMs": 1157
},
{
"questionId": "q130",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "41",
"actual": "38",
"correct": false,
"inputTokens": 1443,
"outputTokens": 5,
"latencyMs": 1155
},
{
"questionId": "q130",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "41",
"actual": "18",
"correct": false,
"inputTokens": 3826,
"outputTokens": 2,
"latencyMs": 1959
},
{
"questionId": "q130",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "41",
"actual": "31",
"correct": false,
"inputTokens": 3413,
"outputTokens": 5,
"latencyMs": 1110
},
{
"questionId": "q130",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "41",
"actual": "34",
"correct": false,
"inputTokens": 2982,
"outputTokens": 2,
"latencyMs": 4540
},
{
"questionId": "q130",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "41",
"actual": "31",
"correct": false,
"inputTokens": 3108,
"outputTokens": 5,
"latencyMs": 1286
},
{
"questionId": "q131",
"format": "json",
"model": "gpt-4o-mini",
"expected": "23",
"actual": "18",
"correct": false,
"inputTokens": 3709,
"outputTokens": 2,
"latencyMs": 1059
},
{
"questionId": "q131",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "23",
"actual": "20",
"correct": false,
"inputTokens": 4078,
"outputTokens": 5,
"latencyMs": 1302
},
{
"questionId": "q131",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "23",
"actual": "18",
"correct": false,
"inputTokens": 1560,
"outputTokens": 2,
"latencyMs": 1019
},
{
"questionId": "q131",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "23",
"actual": "20",
"correct": false,
"inputTokens": 1507,
"outputTokens": 5,
"latencyMs": 975
},
{
"questionId": "q131",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "23",
"actual": "18",
"correct": false,
"inputTokens": 1438,
"outputTokens": 2,
"latencyMs": 1056
},
{
"questionId": "q131",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "23",
"actual": "20",
"correct": false,
"inputTokens": 1443,
"outputTokens": 5,
"latencyMs": 984
},
{
"questionId": "q131",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "23",
"actual": "15",
"correct": false,
"inputTokens": 3826,
"outputTokens": 2,
"latencyMs": 1420
},
{
"questionId": "q131",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "23",
"actual": "21",
"correct": false,
"inputTokens": 3413,
"outputTokens": 5,
"latencyMs": 1139
},
{
"questionId": "q131",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "23",
"actual": "18",
"correct": false,
"inputTokens": 2982,
"outputTokens": 2,
"latencyMs": 1097
},
{
"questionId": "q131",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "23",
"actual": "21",
"correct": false,
"inputTokens": 3108,
"outputTokens": 5,
"latencyMs": 1203
},
{
"questionId": "q132",
"format": "json",
"model": "gpt-4o-mini",
"expected": "430828",
"actual": "430828",
"correct": true,
"inputTokens": 15188,
"outputTokens": 3,
"latencyMs": 2257
},
{
"questionId": "q132",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "430828",
"actual": "430828",
"correct": true,
"inputTokens": 17409,
"outputTokens": 6,
"latencyMs": 1292
},
{
"questionId": "q132",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "430828",
"actual": "430828",
"correct": true,
"inputTokens": 8789,
"outputTokens": 3,
"latencyMs": 1877
},
{
"questionId": "q132",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "430828",
"actual": "430828",
"correct": true,
"inputTokens": 9279,
"outputTokens": 6,
"latencyMs": 1118
},
{
"questionId": "q132",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "430828",
"actual": "430828",
"correct": true,
"inputTokens": 8557,
"outputTokens": 3,
"latencyMs": 4023
},
{
"questionId": "q132",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "430828",
"actual": "430828",
"correct": true,
"inputTokens": 9125,
"outputTokens": 6,
"latencyMs": 1134
},
{
"questionId": "q132",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "430828",
"actual": "430828",
"correct": true,
"inputTokens": 15482,
"outputTokens": 3,
"latencyMs": 5304
},
{
"questionId": "q132",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "430828",
"actual": "430828",
"correct": true,
"inputTokens": 15367,
"outputTokens": 6,
"latencyMs": 1442
},
{
"questionId": "q132",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "430828",
"actual": "430828",
"correct": true,
"inputTokens": 13172,
"outputTokens": 3,
"latencyMs": 2157
},
{
"questionId": "q132",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "430828",
"actual": "430828",
"correct": true,
"inputTokens": 14483,
"outputTokens": 6,
"latencyMs": 1483
},
{
"questionId": "q133",
"format": "json",
"model": "gpt-4o-mini",
"expected": "11798",
"actual": "11798",
"correct": true,
"inputTokens": 15190,
"outputTokens": 3,
"latencyMs": 2084
},
{
"questionId": "q133",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "11798",
"actual": "11798",
"correct": true,
"inputTokens": 17410,
"outputTokens": 6,
"latencyMs": 2592
},
{
"questionId": "q133",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "11798",
"actual": "11798",
"correct": true,
"inputTokens": 8791,
"outputTokens": 3,
"latencyMs": 1208
},
{
"questionId": "q133",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "11798",
"actual": "11798",
"correct": true,
"inputTokens": 9280,
"outputTokens": 6,
"latencyMs": 1261
},
{
"questionId": "q133",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "11798",
"actual": "11798",
"correct": true,
"inputTokens": 8559,
"outputTokens": 3,
"latencyMs": 1697
},
{
"questionId": "q133",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "11798",
"actual": "11798",
"correct": true,
"inputTokens": 9126,
"outputTokens": 6,
"latencyMs": 1171
},
{
"questionId": "q133",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "11798",
"actual": "11798",
"correct": true,
"inputTokens": 15484,
"outputTokens": 3,
"latencyMs": 1704
},
{
"questionId": "q133",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "11798",
"actual": "11798",
"correct": true,
"inputTokens": 15368,
"outputTokens": 6,
"latencyMs": 1637
},
{
"questionId": "q133",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "11798",
"actual": "11798",
"correct": true,
"inputTokens": 13174,
"outputTokens": 3,
"latencyMs": 1599
},
{
"questionId": "q133",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "11798",
"actual": "11798",
"correct": true,
"inputTokens": 14484,
"outputTokens": 6,
"latencyMs": 1505
},
{
"questionId": "q134",
"format": "json",
"model": "gpt-4o-mini",
"expected": "183631",
"actual": "183631",
"correct": true,
"inputTokens": 15193,
"outputTokens": 3,
"latencyMs": 2340
},
{
"questionId": "q134",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "183631",
"actual": "183631",
"correct": true,
"inputTokens": 17412,
"outputTokens": 6,
"latencyMs": 1380
},
{
"questionId": "q134",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "183631",
"actual": "183631",
"correct": true,
"inputTokens": 8794,
"outputTokens": 3,
"latencyMs": 1631
},
{
"questionId": "q134",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "183631",
"actual": "183631",
"correct": true,
"inputTokens": 9282,
"outputTokens": 6,
"latencyMs": 1271
},
{
"questionId": "q134",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "183631",
"actual": "183631",
"correct": true,
"inputTokens": 8562,
"outputTokens": 3,
"latencyMs": 1620
},
{
"questionId": "q134",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "183631",
"actual": "183631",
"correct": true,
"inputTokens": 9128,
"outputTokens": 6,
"latencyMs": 1279
},
{
"questionId": "q134",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "183631",
"actual": "183631",
"correct": true,
"inputTokens": 15487,
"outputTokens": 3,
"latencyMs": 14565
},
{
"questionId": "q134",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "183631",
"actual": "183631",
"correct": true,
"inputTokens": 15370,
"outputTokens": 6,
"latencyMs": 1559
},
{
"questionId": "q134",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "183631",
"actual": "183631",
"correct": true,
"inputTokens": 13177,
"outputTokens": 3,
"latencyMs": 1600
},
{
"questionId": "q134",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "183631",
"actual": "183631",
"correct": true,
"inputTokens": 14486,
"outputTokens": 6,
"latencyMs": 1179
},
{
"questionId": "q135",
"format": "json",
"model": "gpt-4o-mini",
"expected": "29246",
"actual": "29246",
"correct": true,
"inputTokens": 15192,
"outputTokens": 3,
"latencyMs": 2508
},
{
"questionId": "q135",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "29246",
"actual": "29246",
"correct": true,
"inputTokens": 17412,
"outputTokens": 6,
"latencyMs": 1359
},
{
"questionId": "q135",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "29246",
"actual": "29246",
"correct": true,
"inputTokens": 8793,
"outputTokens": 3,
"latencyMs": 1188
},
{
"questionId": "q135",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "29246",
"actual": "29246",
"correct": true,
"inputTokens": 9282,
"outputTokens": 6,
"latencyMs": 1204
},
{
"questionId": "q135",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "29246",
"actual": "29246",
"correct": true,
"inputTokens": 8561,
"outputTokens": 3,
"latencyMs": 2448
},
{
"questionId": "q135",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "29246",
"actual": "29246",
"correct": true,
"inputTokens": 9128,
"outputTokens": 6,
"latencyMs": 1311
},
{
"questionId": "q135",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "29246",
"actual": "29246",
"correct": true,
"inputTokens": 15486,
"outputTokens": 3,
"latencyMs": 2442
},
{
"questionId": "q135",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "29246",
"actual": "29246",
"correct": true,
"inputTokens": 15370,
"outputTokens": 6,
"latencyMs": 1414
},
{
"questionId": "q135",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "29246",
"actual": "29246",
"correct": true,
"inputTokens": 13176,
"outputTokens": 3,
"latencyMs": 2254
},
{
"questionId": "q135",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "29246",
"actual": "29246",
"correct": true,
"inputTokens": 14486,
"outputTokens": 6,
"latencyMs": 1512
},
{
"questionId": "q136",
"format": "json",
"model": "gpt-4o-mini",
"expected": "135306",
"actual": "135306",
"correct": true,
"inputTokens": 15188,
"outputTokens": 3,
"latencyMs": 1565
},
{
"questionId": "q136",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "135306",
"actual": "135306",
"correct": true,
"inputTokens": 17407,
"outputTokens": 6,
"latencyMs": 1871
},
{
"questionId": "q136",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "135306",
"actual": "135306",
"correct": true,
"inputTokens": 8789,
"outputTokens": 3,
"latencyMs": 1963
},
{
"questionId": "q136",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "135306",
"actual": "135306",
"correct": true,
"inputTokens": 9277,
"outputTokens": 6,
"latencyMs": 1533
},
{
"questionId": "q136",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "135306",
"actual": "135306",
"correct": true,
"inputTokens": 8557,
"outputTokens": 3,
"latencyMs": 1561
},
{
"questionId": "q136",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "135306",
"actual": "135306",
"correct": true,
"inputTokens": 9123,
"outputTokens": 6,
"latencyMs": 1200
},
{
"questionId": "q136",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "135306",
"actual": "135306",
"correct": true,
"inputTokens": 15482,
"outputTokens": 3,
"latencyMs": 1657
},
{
"questionId": "q136",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "135306",
"actual": "135306",
"correct": true,
"inputTokens": 15365,
"outputTokens": 6,
"latencyMs": 1582
},
{
"questionId": "q136",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "135306",
"actual": "135306",
"correct": true,
"inputTokens": 13172,
"outputTokens": 3,
"latencyMs": 3402
},
{
"questionId": "q136",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "135306",
"actual": "135306",
"correct": true,
"inputTokens": 14481,
"outputTokens": 6,
"latencyMs": 1251
},
{
"questionId": "q137",
"format": "json",
"model": "gpt-4o-mini",
"expected": "24914",
"actual": "24914",
"correct": true,
"inputTokens": 15187,
"outputTokens": 3,
"latencyMs": 2019
},
{
"questionId": "q137",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "24914",
"actual": "24914",
"correct": true,
"inputTokens": 17408,
"outputTokens": 6,
"latencyMs": 1517
},
{
"questionId": "q137",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "24914",
"actual": "The repository undefined/react-native does not exist in the provided data.",
"correct": false,
"inputTokens": 8788,
"outputTokens": 14,
"latencyMs": 1737
},
{
"questionId": "q137",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "24914",
"actual": "24914",
"correct": true,
"inputTokens": 9278,
"outputTokens": 6,
"latencyMs": 1467
},
{
"questionId": "q137",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "24914",
"actual": "24914",
"correct": true,
"inputTokens": 8556,
"outputTokens": 3,
"latencyMs": 3442
},
{
"questionId": "q137",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "24914",
"actual": "24914",
"correct": true,
"inputTokens": 9124,
"outputTokens": 6,
"latencyMs": 1300
},
{
"questionId": "q137",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "24914",
"actual": "24914",
"correct": true,
"inputTokens": 15481,
"outputTokens": 3,
"latencyMs": 1825
},
{
"questionId": "q137",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "24914",
"actual": "24914",
"correct": true,
"inputTokens": 15366,
"outputTokens": 6,
"latencyMs": 1443
},
{
"questionId": "q137",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "24914",
"actual": "124320",
"correct": false,
"inputTokens": 13171,
"outputTokens": 3,
"latencyMs": 1783
},
{
"questionId": "q137",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "24914",
"actual": "24914",
"correct": true,
"inputTokens": 14482,
"outputTokens": 6,
"latencyMs": 1362
},
{
"questionId": "q138",
"format": "json",
"model": "gpt-4o-mini",
"expected": "111683",
"actual": "111683",
"correct": true,
"inputTokens": 15187,
"outputTokens": 3,
"latencyMs": 1824
},
{
"questionId": "q138",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "111683",
"actual": "111683",
"correct": true,
"inputTokens": 17407,
"outputTokens": 6,
"latencyMs": 1479
},
{
"questionId": "q138",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "111683",
"actual": "108017",
"correct": false,
"inputTokens": 8788,
"outputTokens": 3,
"latencyMs": 3315
},
{
"questionId": "q138",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "111683",
"actual": "111683",
"correct": true,
"inputTokens": 9277,
"outputTokens": 6,
"latencyMs": 1270
},
{
"questionId": "q138",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "111683",
"actual": "111683",
"correct": true,
"inputTokens": 8556,
"outputTokens": 3,
"latencyMs": 1384
},
{
"questionId": "q138",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "111683",
"actual": "111683",
"correct": true,
"inputTokens": 9123,
"outputTokens": 6,
"latencyMs": 1252
},
{
"questionId": "q138",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "111683",
"actual": "111683",
"correct": true,
"inputTokens": 15481,
"outputTokens": 3,
"latencyMs": 3048
},
{
"questionId": "q138",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "111683",
"actual": "111683",
"correct": true,
"inputTokens": 15365,
"outputTokens": 6,
"latencyMs": 1381
},
{
"questionId": "q138",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "111683",
"actual": "111683",
"correct": true,
"inputTokens": 13171,
"outputTokens": 3,
"latencyMs": 3804
},
{
"questionId": "q138",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "111683",
"actual": "111683",
"correct": true,
"inputTokens": 14481,
"outputTokens": 6,
"latencyMs": 1498
},
{
"questionId": "q139",
"format": "json",
"model": "gpt-4o-mini",
"expected": "13364",
"actual": "13364",
"correct": true,
"inputTokens": 15194,
"outputTokens": 3,
"latencyMs": 1726
},
{
"questionId": "q139",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "13364",
"actual": "13364",
"correct": true,
"inputTokens": 17412,
"outputTokens": 6,
"latencyMs": 1526
},
{
"questionId": "q139",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "13364",
"actual": "13364",
"correct": true,
"inputTokens": 8795,
"outputTokens": 3,
"latencyMs": 1685
},
{
"questionId": "q139",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "13364",
"actual": "13364",
"correct": true,
"inputTokens": 9282,
"outputTokens": 6,
"latencyMs": 1140
},
{
"questionId": "q139",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "13364",
"actual": "0",
"correct": false,
"inputTokens": 8563,
"outputTokens": 2,
"latencyMs": 1933
},
{
"questionId": "q139",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "13364",
"actual": "13364",
"correct": true,
"inputTokens": 9128,
"outputTokens": 6,
"latencyMs": 1157
},
{
"questionId": "q139",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "13364",
"actual": "13364",
"correct": true,
"inputTokens": 15488,
"outputTokens": 3,
"latencyMs": 1249
},
{
"questionId": "q139",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "13364",
"actual": "13364",
"correct": true,
"inputTokens": 15370,
"outputTokens": 6,
"latencyMs": 1347
},
{
"questionId": "q139",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "13364",
"actual": "13364",
"correct": true,
"inputTokens": 13178,
"outputTokens": 3,
"latencyMs": 2174
},
{
"questionId": "q139",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "13364",
"actual": "13364",
"correct": true,
"inputTokens": 14486,
"outputTokens": 6,
"latencyMs": 1197
},
{
"questionId": "q140",
"format": "json",
"model": "gpt-4o-mini",
"expected": "98464",
"actual": "0",
"correct": false,
"inputTokens": 15186,
"outputTokens": 2,
"latencyMs": 3252
},
{
"questionId": "q140",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "98464",
"actual": "98464",
"correct": true,
"inputTokens": 17405,
"outputTokens": 6,
"latencyMs": 1667
},
{
"questionId": "q140",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "98464",
"actual": "0",
"correct": false,
"inputTokens": 8787,
"outputTokens": 2,
"latencyMs": 1192
},
{
"questionId": "q140",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "98464",
"actual": "98464",
"correct": true,
"inputTokens": 9275,
"outputTokens": 6,
"latencyMs": 1113
},
{
"questionId": "q140",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "98464",
"actual": "0",
"correct": false,
"inputTokens": 8555,
"outputTokens": 2,
"latencyMs": 2198
},
{
"questionId": "q140",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "98464",
"actual": "98464",
"correct": true,
"inputTokens": 9121,
"outputTokens": 6,
"latencyMs": 1187
},
{
"questionId": "q140",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "98464",
"actual": "0",
"correct": false,
"inputTokens": 15480,
"outputTokens": 2,
"latencyMs": 8573
},
{
"questionId": "q140",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "98464",
"actual": "98464",
"correct": true,
"inputTokens": 15363,
"outputTokens": 6,
"latencyMs": 1311
},
{
"questionId": "q140",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "98464",
"actual": "0",
"correct": false,
"inputTokens": 13170,
"outputTokens": 2,
"latencyMs": 3471
},
{
"questionId": "q140",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "98464",
"actual": "98464",
"correct": true,
"inputTokens": 14479,
"outputTokens": 6,
"latencyMs": 1457
},
{
"questionId": "q141",
"format": "json",
"model": "gpt-4o-mini",
"expected": "6378",
"actual": "6378",
"correct": true,
"inputTokens": 15188,
"outputTokens": 3,
"latencyMs": 1363
},
{
"questionId": "q141",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "6378",
"actual": "6378",
"correct": true,
"inputTokens": 17408,
"outputTokens": 6,
"latencyMs": 1803
},
{
"questionId": "q141",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "6378",
"actual": "6378",
"correct": true,
"inputTokens": 8789,
"outputTokens": 3,
"latencyMs": 3696
},
{
"questionId": "q141",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "6378",
"actual": "6378",
"correct": true,
"inputTokens": 9278,
"outputTokens": 6,
"latencyMs": 1391
},
{
"questionId": "q141",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "6378",
"actual": "93731",
"correct": false,
"inputTokens": 8557,
"outputTokens": 3,
"latencyMs": 7861
},
{
"questionId": "q141",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "6378",
"actual": "6378",
"correct": true,
"inputTokens": 9124,
"outputTokens": 6,
"latencyMs": 1420
},
{
"questionId": "q141",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "6378",
"actual": "6378",
"correct": true,
"inputTokens": 15482,
"outputTokens": 3,
"latencyMs": 1769
},
{
"questionId": "q141",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "6378",
"actual": "6378",
"correct": true,
"inputTokens": 15366,
"outputTokens": 6,
"latencyMs": 1233
},
{
"questionId": "q141",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "6378",
"actual": "93731",
"correct": false,
"inputTokens": 13172,
"outputTokens": 3,
"latencyMs": 1831
},
{
"questionId": "q141",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "6378",
"actual": "6378",
"correct": true,
"inputTokens": 14482,
"outputTokens": 6,
"latencyMs": 1507
},
{
"questionId": "q142",
"format": "json",
"model": "gpt-4o-mini",
"expected": "254916",
"actual": "254916",
"correct": true,
"inputTokens": 15190,
"outputTokens": 3,
"latencyMs": 10752
},
{
"questionId": "q142",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "254916",
"actual": "254916",
"correct": true,
"inputTokens": 17409,
"outputTokens": 6,
"latencyMs": 1672
},
{
"questionId": "q142",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "254916",
"actual": "254916",
"correct": true,
"inputTokens": 8791,
"outputTokens": 3,
"latencyMs": 1788
},
{
"questionId": "q142",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "254916",
"actual": "254916",
"correct": true,
"inputTokens": 9279,
"outputTokens": 6,
"latencyMs": 1633
},
{
"questionId": "q142",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "254916",
"actual": "254916",
"correct": true,
"inputTokens": 8559,
"outputTokens": 3,
"latencyMs": 1365
},
{
"questionId": "q142",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "254916",
"actual": "254916",
"correct": true,
"inputTokens": 9125,
"outputTokens": 6,
"latencyMs": 1242
},
{
"questionId": "q142",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "254916",
"actual": "254916",
"correct": true,
"inputTokens": 15484,
"outputTokens": 3,
"latencyMs": 2237
},
{
"questionId": "q142",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "254916",
"actual": "254916",
"correct": true,
"inputTokens": 15367,
"outputTokens": 6,
"latencyMs": 1275
},
{
"questionId": "q142",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "254916",
"actual": "254916",
"correct": true,
"inputTokens": 13174,
"outputTokens": 3,
"latencyMs": 3028
},
{
"questionId": "q142",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "254916",
"actual": "254916",
"correct": true,
"inputTokens": 14483,
"outputTokens": 6,
"latencyMs": 1615
},
{
"questionId": "q143",
"format": "json",
"model": "gpt-4o-mini",
"expected": "32413",
"actual": "32413",
"correct": true,
"inputTokens": 15188,
"outputTokens": 3,
"latencyMs": 1972
},
{
"questionId": "q143",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "32413",
"actual": "32413",
"correct": true,
"inputTokens": 17410,
"outputTokens": 6,
"latencyMs": 2308
},
{
"questionId": "q143",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "32413",
"actual": "32413",
"correct": true,
"inputTokens": 8789,
"outputTokens": 3,
"latencyMs": 1361
},
{
"questionId": "q143",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "32413",
"actual": "32413",
"correct": true,
"inputTokens": 9280,
"outputTokens": 6,
"latencyMs": 1162
},
{
"questionId": "q143",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "32413",
"actual": "32413",
"correct": true,
"inputTokens": 8557,
"outputTokens": 3,
"latencyMs": 2196
},
{
"questionId": "q143",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "32413",
"actual": "32413",
"correct": true,
"inputTokens": 9126,
"outputTokens": 6,
"latencyMs": 1199
},
{
"questionId": "q143",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "32413",
"actual": "32413",
"correct": true,
"inputTokens": 15482,
"outputTokens": 3,
"latencyMs": 1758
},
{
"questionId": "q143",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "32413",
"actual": "32413",
"correct": true,
"inputTokens": 15368,
"outputTokens": 6,
"latencyMs": 1340
},
{
"questionId": "q143",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "32413",
"actual": "32413",
"correct": true,
"inputTokens": 13172,
"outputTokens": 3,
"latencyMs": 2122
},
{
"questionId": "q143",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "32413",
"actual": "32413",
"correct": true,
"inputTokens": 14484,
"outputTokens": 6,
"latencyMs": 1156
},
{
"questionId": "q144",
"format": "json",
"model": "gpt-4o-mini",
"expected": "240059",
"actual": "0",
"correct": false,
"inputTokens": 15186,
"outputTokens": 2,
"latencyMs": 1208
},
{
"questionId": "q144",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "240059",
"actual": "240059",
"correct": true,
"inputTokens": 17405,
"outputTokens": 6,
"latencyMs": 1826
},
{
"questionId": "q144",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "240059",
"actual": "undefined",
"correct": false,
"inputTokens": 8787,
"outputTokens": 2,
"latencyMs": 2224
},
{
"questionId": "q144",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "240059",
"actual": "240059",
"correct": true,
"inputTokens": 9275,
"outputTokens": 6,
"latencyMs": 1220
},
{
"questionId": "q144",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "240059",
"actual": "undefined",
"correct": false,
"inputTokens": 8555,
"outputTokens": 2,
"latencyMs": 1199
},
{
"questionId": "q144",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "240059",
"actual": "240059",
"correct": true,
"inputTokens": 9121,
"outputTokens": 6,
"latencyMs": 1264
},
{
"questionId": "q144",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "240059",
"actual": "undefined/react does not exist in the provided data.",
"correct": false,
"inputTokens": 15480,
"outputTokens": 11,
"latencyMs": 3072
},
{
"questionId": "q144",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "240059",
"actual": "240059",
"correct": true,
"inputTokens": 15363,
"outputTokens": 6,
"latencyMs": 1609
},
{
"questionId": "q144",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "240059",
"actual": "undefined/react does not exist in the provided data.",
"correct": false,
"inputTokens": 13170,
"outputTokens": 11,
"latencyMs": 2608
},
{
"questionId": "q144",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "240059",
"actual": "240059",
"correct": true,
"inputTokens": 14479,
"outputTokens": 6,
"latencyMs": 1237
},
{
"questionId": "q145",
"format": "json",
"model": "gpt-4o-mini",
"expected": "48986",
"actual": "0",
"correct": false,
"inputTokens": 15187,
"outputTokens": 2,
"latencyMs": 1906
},
{
"questionId": "q145",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "48986",
"actual": "48986",
"correct": true,
"inputTokens": 17406,
"outputTokens": 6,
"latencyMs": 1399
},
{
"questionId": "q145",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "48986",
"actual": "0",
"correct": false,
"inputTokens": 8788,
"outputTokens": 2,
"latencyMs": 2026
},
{
"questionId": "q145",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "48986",
"actual": "48986",
"correct": true,
"inputTokens": 9276,
"outputTokens": 6,
"latencyMs": 1318
},
{
"questionId": "q145",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "48986",
"actual": "0",
"correct": false,
"inputTokens": 8556,
"outputTokens": 2,
"latencyMs": 1605
},
{
"questionId": "q145",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "48986",
"actual": "48986",
"correct": true,
"inputTokens": 9122,
"outputTokens": 6,
"latencyMs": 1270
},
{
"questionId": "q145",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "48986",
"actual": "0",
"correct": false,
"inputTokens": 15481,
"outputTokens": 2,
"latencyMs": 5367
},
{
"questionId": "q145",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "48986",
"actual": "48986",
"correct": true,
"inputTokens": 15364,
"outputTokens": 6,
"latencyMs": 1204
},
{
"questionId": "q145",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "48986",
"actual": "The repository \"undefined/Python\" does not exist in the provided data.",
"correct": false,
"inputTokens": 13171,
"outputTokens": 16,
"latencyMs": 6329
},
{
"questionId": "q145",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "48986",
"actual": "48986",
"correct": true,
"inputTokens": 14480,
"outputTokens": 6,
"latencyMs": 1369
},
{
"questionId": "q146",
"format": "json",
"model": "gpt-4o-mini",
"expected": "209624",
"actual": "209624",
"correct": true,
"inputTokens": 15186,
"outputTokens": 3,
"latencyMs": 2063
},
{
"questionId": "q146",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "209624",
"actual": "209624",
"correct": true,
"inputTokens": 17405,
"outputTokens": 6,
"latencyMs": 1470
},
{
"questionId": "q146",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "209624",
"actual": "209624",
"correct": true,
"inputTokens": 8787,
"outputTokens": 3,
"latencyMs": 1386
},
{
"questionId": "q146",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "209624",
"actual": "209624",
"correct": true,
"inputTokens": 9275,
"outputTokens": 6,
"latencyMs": 1104
},
{
"questionId": "q146",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "209624",
"actual": "209624",
"correct": true,
"inputTokens": 8555,
"outputTokens": 3,
"latencyMs": 1747
},
{
"questionId": "q146",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "209624",
"actual": "209624",
"correct": true,
"inputTokens": 9121,
"outputTokens": 6,
"latencyMs": 1300
},
{
"questionId": "q146",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "209624",
"actual": "209624",
"correct": true,
"inputTokens": 15480,
"outputTokens": 3,
"latencyMs": 1443
},
{
"questionId": "q146",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "209624",
"actual": "209624",
"correct": true,
"inputTokens": 15363,
"outputTokens": 6,
"latencyMs": 1282
},
{
"questionId": "q146",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "209624",
"actual": "209624",
"correct": true,
"inputTokens": 13170,
"outputTokens": 3,
"latencyMs": 2185
},
{
"questionId": "q146",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "209624",
"actual": "209624",
"correct": true,
"inputTokens": 14479,
"outputTokens": 6,
"latencyMs": 1407
},
{
"questionId": "q147",
"format": "json",
"model": "gpt-4o-mini",
"expected": "58023",
"actual": "58023",
"correct": true,
"inputTokens": 15186,
"outputTokens": 3,
"latencyMs": 1743
},
{
"questionId": "q147",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "58023",
"actual": "58023",
"correct": true,
"inputTokens": 17406,
"outputTokens": 6,
"latencyMs": 1564
},
{
"questionId": "q147",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "58023",
"actual": "58023",
"correct": true,
"inputTokens": 8787,
"outputTokens": 3,
"latencyMs": 1317
},
{
"questionId": "q147",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "58023",
"actual": "58023",
"correct": true,
"inputTokens": 9276,
"outputTokens": 6,
"latencyMs": 1258
},
{
"questionId": "q147",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "58023",
"actual": "58023",
"correct": true,
"inputTokens": 8555,
"outputTokens": 3,
"latencyMs": 2419
},
{
"questionId": "q147",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "58023",
"actual": "58023",
"correct": true,
"inputTokens": 9122,
"outputTokens": 6,
"latencyMs": 1171
},
{
"questionId": "q147",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "58023",
"actual": "undefined/linux does not exist in the provided data.",
"correct": false,
"inputTokens": 15480,
"outputTokens": 11,
"latencyMs": 1680
},
{
"questionId": "q147",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "58023",
"actual": "58023",
"correct": true,
"inputTokens": 15364,
"outputTokens": 6,
"latencyMs": 1396
},
{
"questionId": "q147",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "58023",
"actual": "The repository \"undefined/linux\" does not exist in the provided data.",
"correct": false,
"inputTokens": 13170,
"outputTokens": 15,
"latencyMs": 1418
},
{
"questionId": "q147",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "58023",
"actual": "58023",
"correct": true,
"inputTokens": 14480,
"outputTokens": 6,
"latencyMs": 1399
},
{
"questionId": "q148",
"format": "json",
"model": "gpt-4o-mini",
"expected": "196024",
"actual": "196024",
"correct": true,
"inputTokens": 15189,
"outputTokens": 3,
"latencyMs": 1673
},
{
"questionId": "q148",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "196024",
"actual": "196024",
"correct": true,
"inputTokens": 17407,
"outputTokens": 6,
"latencyMs": 1736
},
{
"questionId": "q148",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "196024",
"actual": "196024",
"correct": true,
"inputTokens": 8790,
"outputTokens": 3,
"latencyMs": 1754
},
{
"questionId": "q148",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "196024",
"actual": "196024",
"correct": true,
"inputTokens": 9277,
"outputTokens": 6,
"latencyMs": 1317
},
{
"questionId": "q148",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "196024",
"actual": "0",
"correct": false,
"inputTokens": 8558,
"outputTokens": 2,
"latencyMs": 3219
},
{
"questionId": "q148",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "196024",
"actual": "196024",
"correct": true,
"inputTokens": 9123,
"outputTokens": 6,
"latencyMs": 1311
},
{
"questionId": "q148",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "196024",
"actual": "196024",
"correct": true,
"inputTokens": 15483,
"outputTokens": 3,
"latencyMs": 1346
},
{
"questionId": "q148",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "196024",
"actual": "196024",
"correct": true,
"inputTokens": 15365,
"outputTokens": 6,
"latencyMs": 1560
},
{
"questionId": "q148",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "196024",
"actual": "196024",
"correct": true,
"inputTokens": 13173,
"outputTokens": 3,
"latencyMs": 1009
},
{
"questionId": "q148",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "196024",
"actual": "196024",
"correct": true,
"inputTokens": 14481,
"outputTokens": 6,
"latencyMs": 1446
},
{
"questionId": "q149",
"format": "json",
"model": "gpt-4o-mini",
"expected": "30919",
"actual": "30919",
"correct": true,
"inputTokens": 15189,
"outputTokens": 3,
"latencyMs": 3361
},
{
"questionId": "q149",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "30919",
"actual": "30919",
"correct": true,
"inputTokens": 17408,
"outputTokens": 6,
"latencyMs": 1788
},
{
"questionId": "q149",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "30919",
"actual": "30919",
"correct": true,
"inputTokens": 8790,
"outputTokens": 3,
"latencyMs": 1123
},
{
"questionId": "q149",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "30919",
"actual": "30919",
"correct": true,
"inputTokens": 9278,
"outputTokens": 6,
"latencyMs": 1235
},
{
"questionId": "q149",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "30919",
"actual": "30919",
"correct": true,
"inputTokens": 8558,
"outputTokens": 3,
"latencyMs": 1100
},
{
"questionId": "q149",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "30919",
"actual": "30919",
"correct": true,
"inputTokens": 9124,
"outputTokens": 6,
"latencyMs": 1188
},
{
"questionId": "q149",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "30919",
"actual": "30919",
"correct": true,
"inputTokens": 15483,
"outputTokens": 3,
"latencyMs": 1557
},
{
"questionId": "q149",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "30919",
"actual": "30919",
"correct": true,
"inputTokens": 15366,
"outputTokens": 6,
"latencyMs": 1352
},
{
"questionId": "q149",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "30919",
"actual": "30919",
"correct": true,
"inputTokens": 13173,
"outputTokens": 3,
"latencyMs": 1280
},
{
"questionId": "q149",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "30919",
"actual": "30919",
"correct": true,
"inputTokens": 14482,
"outputTokens": 6,
"latencyMs": 1247
},
{
"questionId": "q150",
"format": "json",
"model": "gpt-4o-mini",
"expected": "192220",
"actual": "192220",
"correct": true,
"inputTokens": 15188,
"outputTokens": 3,
"latencyMs": 1394
},
{
"questionId": "q150",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "192220",
"actual": "192220",
"correct": true,
"inputTokens": 17405,
"outputTokens": 6,
"latencyMs": 1801
},
{
"questionId": "q150",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "192220",
"actual": "192220",
"correct": true,
"inputTokens": 8789,
"outputTokens": 3,
"latencyMs": 2052
},
{
"questionId": "q150",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "192220",
"actual": "192220",
"correct": true,
"inputTokens": 9275,
"outputTokens": 6,
"latencyMs": 1176
},
{
"questionId": "q150",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "192220",
"actual": "192220",
"correct": true,
"inputTokens": 8557,
"outputTokens": 3,
"latencyMs": 2084
},
{
"questionId": "q150",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "192220",
"actual": "192220",
"correct": true,
"inputTokens": 9121,
"outputTokens": 6,
"latencyMs": 1191
},
{
"questionId": "q150",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "192220",
"actual": "192220",
"correct": true,
"inputTokens": 15482,
"outputTokens": 3,
"latencyMs": 1261
},
{
"questionId": "q150",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "192220",
"actual": "192220",
"correct": true,
"inputTokens": 15363,
"outputTokens": 6,
"latencyMs": 1355
},
{
"questionId": "q150",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "192220",
"actual": "192220",
"correct": true,
"inputTokens": 13172,
"outputTokens": 3,
"latencyMs": 3388
},
{
"questionId": "q150",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "192220",
"actual": "192220",
"correct": true,
"inputTokens": 14479,
"outputTokens": 6,
"latencyMs": 1591
},
{
"questionId": "q151",
"format": "json",
"model": "gpt-4o-mini",
"expected": "11763",
"actual": "11763",
"correct": true,
"inputTokens": 15191,
"outputTokens": 3,
"latencyMs": 1942
},
{
"questionId": "q151",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "11763",
"actual": "11763",
"correct": true,
"inputTokens": 17414,
"outputTokens": 6,
"latencyMs": 1340
},
{
"questionId": "q151",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "11763",
"actual": "11763",
"correct": true,
"inputTokens": 8792,
"outputTokens": 3,
"latencyMs": 1443
},
{
"questionId": "q151",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "11763",
"actual": "11763",
"correct": true,
"inputTokens": 9284,
"outputTokens": 6,
"latencyMs": 1732
},
{
"questionId": "q151",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "11763",
"actual": "11763",
"correct": true,
"inputTokens": 8560,
"outputTokens": 3,
"latencyMs": 1994
},
{
"questionId": "q151",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "11763",
"actual": "11763",
"correct": true,
"inputTokens": 9130,
"outputTokens": 6,
"latencyMs": 1198
},
{
"questionId": "q151",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "11763",
"actual": "11763",
"correct": true,
"inputTokens": 15485,
"outputTokens": 3,
"latencyMs": 5013
},
{
"questionId": "q151",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "11763",
"actual": "11763",
"correct": true,
"inputTokens": 15372,
"outputTokens": 6,
"latencyMs": 1463
},
{
"questionId": "q151",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "11763",
"actual": "11763",
"correct": true,
"inputTokens": 13175,
"outputTokens": 3,
"latencyMs": 1296
},
{
"questionId": "q151",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "11763",
"actual": "11763",
"correct": true,
"inputTokens": 14488,
"outputTokens": 6,
"latencyMs": 2877
},
{
"questionId": "q152",
"format": "json",
"model": "gpt-4o-mini",
"expected": "100",
"actual": "0",
"correct": false,
"inputTokens": 15188,
"outputTokens": 2,
"latencyMs": 2160
},
{
"questionId": "q152",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "0",
"correct": false,
"inputTokens": 17406,
"outputTokens": 5,
"latencyMs": 1947
},
{
"questionId": "q152",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "100",
"actual": "0",
"correct": false,
"inputTokens": 8789,
"outputTokens": 2,
"latencyMs": 1222
},
{
"questionId": "q152",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "0",
"correct": false,
"inputTokens": 9276,
"outputTokens": 5,
"latencyMs": 1487
},
{
"questionId": "q152",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "100",
"actual": "0",
"correct": false,
"inputTokens": 8557,
"outputTokens": 2,
"latencyMs": 1450
},
{
"questionId": "q152",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "0",
"correct": false,
"inputTokens": 9122,
"outputTokens": 5,
"latencyMs": 1358
},
{
"questionId": "q152",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "100",
"actual": "0",
"correct": false,
"inputTokens": 15482,
"outputTokens": 2,
"latencyMs": 873
},
{
"questionId": "q152",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "100",
"correct": true,
"inputTokens": 15364,
"outputTokens": 5,
"latencyMs": 1500
},
{
"questionId": "q152",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "100",
"actual": "0",
"correct": false,
"inputTokens": 13172,
"outputTokens": 2,
"latencyMs": 7031
},
{
"questionId": "q152",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "0",
"correct": false,
"inputTokens": 14480,
"outputTokens": 5,
"latencyMs": 1916
},
{
"questionId": "q153",
"format": "json",
"model": "gpt-4o-mini",
"expected": "15404143",
"actual": "43115556",
"correct": false,
"inputTokens": 15189,
"outputTokens": 4,
"latencyMs": 3324
},
{
"questionId": "q153",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "15404143",
"actual": "13,847,892",
"correct": false,
"inputTokens": 17407,
"outputTokens": 9,
"latencyMs": 1607
},
{
"questionId": "q153",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "15404143",
"actual": "10419582",
"correct": false,
"inputTokens": 8790,
"outputTokens": 4,
"latencyMs": 900
},
{
"questionId": "q153",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "15404143",
"actual": "13,847,892",
"correct": false,
"inputTokens": 9277,
"outputTokens": 9,
"latencyMs": 1385
},
{
"questionId": "q153",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "15404143",
"actual": "10419582",
"correct": false,
"inputTokens": 8558,
"outputTokens": 4,
"latencyMs": 1922
},
{
"questionId": "q153",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "15404143",
"actual": "15,847,892",
"correct": false,
"inputTokens": 9123,
"outputTokens": 9,
"latencyMs": 1230
},
{
"questionId": "q153",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "15404143",
"actual": "10419580",
"correct": false,
"inputTokens": 15483,
"outputTokens": 4,
"latencyMs": 1716
},
{
"questionId": "q153",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "15404143",
"actual": "13,847,892",
"correct": false,
"inputTokens": 15365,
"outputTokens": 9,
"latencyMs": 1384
},
{
"questionId": "q153",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "15404143",
"actual": "Total number of stars across all repositories is 4,978,155.",
"correct": false,
"inputTokens": 13173,
"outputTokens": 16,
"latencyMs": 3411
},
{
"questionId": "q153",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "15404143",
"actual": "13,847,892",
"correct": false,
"inputTokens": 14481,
"outputTokens": 9,
"latencyMs": 1539
},
{
"questionId": "q154",
"format": "json",
"model": "gpt-4o-mini",
"expected": "100",
"actual": "77",
"correct": false,
"inputTokens": 15189,
"outputTokens": 2,
"latencyMs": 2523
},
{
"questionId": "q154",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "100",
"correct": true,
"inputTokens": 17408,
"outputTokens": 5,
"latencyMs": 1885
},
{
"questionId": "q154",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "100",
"actual": "42",
"correct": false,
"inputTokens": 8790,
"outputTokens": 2,
"latencyMs": 1148
},
{
"questionId": "q154",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "100",
"correct": true,
"inputTokens": 9278,
"outputTokens": 5,
"latencyMs": 1378
},
{
"questionId": "q154",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "100",
"actual": "42",
"correct": false,
"inputTokens": 8558,
"outputTokens": 2,
"latencyMs": 1364
},
{
"questionId": "q154",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "100",
"correct": true,
"inputTokens": 9124,
"outputTokens": 5,
"latencyMs": 1125
},
{
"questionId": "q154",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "100",
"actual": "104",
"correct": false,
"inputTokens": 15483,
"outputTokens": 2,
"latencyMs": 1276
},
{
"questionId": "q154",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "100",
"correct": true,
"inputTokens": 15366,
"outputTokens": 5,
"latencyMs": 1331
},
{
"questionId": "q154",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "100",
"actual": "77",
"correct": false,
"inputTokens": 13173,
"outputTokens": 2,
"latencyMs": 1534
},
{
"questionId": "q154",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "100",
"correct": true,
"inputTokens": 14482,
"outputTokens": 5,
"latencyMs": 1282
},
{
"questionId": "q155",
"format": "json",
"model": "gpt-4o-mini",
"expected": "100",
"actual": "19",
"correct": false,
"inputTokens": 15189,
"outputTokens": 2,
"latencyMs": 2206
},
{
"questionId": "q155",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "71",
"correct": false,
"inputTokens": 17408,
"outputTokens": 5,
"latencyMs": 1568
},
{
"questionId": "q155",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "100",
"actual": "15",
"correct": false,
"inputTokens": 8790,
"outputTokens": 2,
"latencyMs": 1478
},
{
"questionId": "q155",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "42",
"correct": false,
"inputTokens": 9278,
"outputTokens": 5,
"latencyMs": 1314
},
{
"questionId": "q155",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "100",
"actual": "12",
"correct": false,
"inputTokens": 8558,
"outputTokens": 2,
"latencyMs": 2149
},
{
"questionId": "q155",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "47",
"correct": false,
"inputTokens": 9124,
"outputTokens": 5,
"latencyMs": 1485
},
{
"questionId": "q155",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "100",
"actual": "34",
"correct": false,
"inputTokens": 15483,
"outputTokens": 2,
"latencyMs": 1043
},
{
"questionId": "q155",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "71",
"correct": false,
"inputTokens": 15366,
"outputTokens": 5,
"latencyMs": 1371
},
{
"questionId": "q155",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "100",
"actual": "34",
"correct": false,
"inputTokens": 13173,
"outputTokens": 2,
"latencyMs": 1693
},
{
"questionId": "q155",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "71",
"correct": false,
"inputTokens": 14482,
"outputTokens": 5,
"latencyMs": 1237
},
{
"questionId": "q156",
"format": "json",
"model": "gpt-4o-mini",
"expected": "76",
"actual": "82",
"correct": false,
"inputTokens": 15189,
"outputTokens": 2,
"latencyMs": 927
},
{
"questionId": "q156",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "76",
"actual": "100",
"correct": false,
"inputTokens": 17408,
"outputTokens": 5,
"latencyMs": 1274
},
{
"questionId": "q156",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "76",
"actual": "34",
"correct": false,
"inputTokens": 8790,
"outputTokens": 2,
"latencyMs": 2541
},
{
"questionId": "q156",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "76",
"actual": "100",
"correct": false,
"inputTokens": 9278,
"outputTokens": 5,
"latencyMs": 1116
},
{
"questionId": "q156",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "76",
"actual": "34",
"correct": false,
"inputTokens": 8558,
"outputTokens": 2,
"latencyMs": 997
},
{
"questionId": "q156",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "76",
"actual": "100",
"correct": false,
"inputTokens": 9124,
"outputTokens": 5,
"latencyMs": 1513
},
{
"questionId": "q156",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "76",
"actual": "104",
"correct": false,
"inputTokens": 15483,
"outputTokens": 2,
"latencyMs": 3168
},
{
"questionId": "q156",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "76",
"actual": "100",
"correct": false,
"inputTokens": 15366,
"outputTokens": 5,
"latencyMs": 1498
},
{
"questionId": "q156",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "76",
"actual": "66",
"correct": false,
"inputTokens": 13173,
"outputTokens": 2,
"latencyMs": 1600
},
{
"questionId": "q156",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "76",
"actual": "100",
"correct": false,
"inputTokens": 14482,
"outputTokens": 5,
"latencyMs": 1519
},
{
"questionId": "q157",
"format": "json",
"model": "gpt-4o-mini",
"expected": "100",
"actual": "77",
"correct": false,
"inputTokens": 15189,
"outputTokens": 2,
"latencyMs": 1809
},
{
"questionId": "q157",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "89",
"correct": false,
"inputTokens": 17409,
"outputTokens": 5,
"latencyMs": 1409
},
{
"questionId": "q157",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "100",
"actual": "66",
"correct": false,
"inputTokens": 8790,
"outputTokens": 2,
"latencyMs": 1367
},
{
"questionId": "q157",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "73",
"correct": false,
"inputTokens": 9279,
"outputTokens": 5,
"latencyMs": 1296
},
{
"questionId": "q157",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "100",
"actual": "66",
"correct": false,
"inputTokens": 8558,
"outputTokens": 2,
"latencyMs": 1162
},
{
"questionId": "q157",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "89",
"correct": false,
"inputTokens": 9125,
"outputTokens": 5,
"latencyMs": 1435
},
{
"questionId": "q157",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "100",
"actual": "77",
"correct": false,
"inputTokens": 15483,
"outputTokens": 2,
"latencyMs": 1774
},
{
"questionId": "q157",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "95",
"correct": false,
"inputTokens": 15367,
"outputTokens": 5,
"latencyMs": 1479
},
{
"questionId": "q157",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "100",
"actual": "66",
"correct": false,
"inputTokens": 13173,
"outputTokens": 2,
"latencyMs": 2710
},
{
"questionId": "q157",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "100",
"actual": "95",
"correct": false,
"inputTokens": 14483,
"outputTokens": 5,
"latencyMs": 1272
},
{
"questionId": "q158",
"format": "json",
"model": "gpt-4o-mini",
"expected": "95",
"actual": "42",
"correct": false,
"inputTokens": 15189,
"outputTokens": 2,
"latencyMs": 3038
},
{
"questionId": "q158",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "95",
"actual": "42",
"correct": false,
"inputTokens": 17409,
"outputTokens": 5,
"latencyMs": 1562
},
{
"questionId": "q158",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "95",
"actual": "38",
"correct": false,
"inputTokens": 8790,
"outputTokens": 2,
"latencyMs": 1536
},
{
"questionId": "q158",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "95",
"actual": "42",
"correct": false,
"inputTokens": 9279,
"outputTokens": 5,
"latencyMs": 1216
},
{
"questionId": "q158",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "95",
"actual": "34",
"correct": false,
"inputTokens": 8558,
"outputTokens": 2,
"latencyMs": 1760
},
{
"questionId": "q158",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "95",
"actual": "42",
"correct": false,
"inputTokens": 9125,
"outputTokens": 5,
"latencyMs": 1255
},
{
"questionId": "q158",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "95",
"actual": "66",
"correct": false,
"inputTokens": 15483,
"outputTokens": 2,
"latencyMs": 1683
},
{
"questionId": "q158",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "95",
"actual": "47",
"correct": false,
"inputTokens": 15367,
"outputTokens": 5,
"latencyMs": 2256
},
{
"questionId": "q158",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "95",
"actual": "38",
"correct": false,
"inputTokens": 13173,
"outputTokens": 2,
"latencyMs": 2831
},
{
"questionId": "q158",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "95",
"actual": "47",
"correct": false,
"inputTokens": 14483,
"outputTokens": 5,
"latencyMs": 1980
},
{
"questionId": "q159",
"format": "json",
"model": "gpt-4o-mini",
"expected": "83",
"actual": "66",
"correct": false,
"inputTokens": 15189,
"outputTokens": 2,
"latencyMs": 1327
},
{
"questionId": "q159",
"format": "json",
"model": "claude-haiku-4-5",
"expected": "83",
"actual": "71",
"correct": false,
"inputTokens": 17409,
"outputTokens": 5,
"latencyMs": 1894
},
{
"questionId": "q159",
"format": "toon",
"model": "gpt-4o-mini",
"expected": "83",
"actual": "34",
"correct": false,
"inputTokens": 8790,
"outputTokens": 2,
"latencyMs": 784
},
{
"questionId": "q159",
"format": "toon",
"model": "claude-haiku-4-5",
"expected": "83",
"actual": "73",
"correct": false,
"inputTokens": 9279,
"outputTokens": 5,
"latencyMs": 1422
},
{
"questionId": "q159",
"format": "csv",
"model": "gpt-4o-mini",
"expected": "83",
"actual": "34",
"correct": false,
"inputTokens": 8558,
"outputTokens": 2,
"latencyMs": 2644
},
{
"questionId": "q159",
"format": "csv",
"model": "claude-haiku-4-5",
"expected": "83",
"actual": "73",
"correct": false,
"inputTokens": 9125,
"outputTokens": 5,
"latencyMs": 1109
},
{
"questionId": "q159",
"format": "markdown-kv",
"model": "gpt-4o-mini",
"expected": "83",
"actual": "66",
"correct": false,
"inputTokens": 15483,
"outputTokens": 2,
"latencyMs": 1826
},
{
"questionId": "q159",
"format": "markdown-kv",
"model": "claude-haiku-4-5",
"expected": "83",
"actual": "71",
"correct": false,
"inputTokens": 15367,
"outputTokens": 5,
"latencyMs": 1342
},
{
"questionId": "q159",
"format": "yaml",
"model": "gpt-4o-mini",
"expected": "83",
"actual": "38",
"correct": false,
"inputTokens": 13173,
"outputTokens": 2,
"latencyMs": 2055
},
{
"questionId": "q159",
"format": "yaml",
"model": "claude-haiku-4-5",
"expected": "83",
"actual": "71",
"correct": false,
"inputTokens": 14483,
"outputTokens": 5,
"latencyMs": 1537
}
]