mirror of
https://github.com/voson-wang/toon.git
synced 2026-01-29 15:24:10 +08:00
17493 lines
361 KiB
JSON
17493 lines
361 KiB
JSON
[
|
|
{
|
|
"questionId": "q1",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "56176",
|
|
"actual": "56176",
|
|
"correct": true,
|
|
"inputTokens": 6391,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1313
|
|
},
|
|
{
|
|
"questionId": "q1",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "56176",
|
|
"actual": "56176",
|
|
"correct": true,
|
|
"inputTokens": 7870,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1346
|
|
},
|
|
{
|
|
"questionId": "q1",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "56176",
|
|
"actual": "56176",
|
|
"correct": true,
|
|
"inputTokens": 2528,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1191
|
|
},
|
|
{
|
|
"questionId": "q1",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "56176",
|
|
"actual": "56176",
|
|
"correct": true,
|
|
"inputTokens": 2982,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1399
|
|
},
|
|
{
|
|
"questionId": "q1",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "56176",
|
|
"actual": "56176",
|
|
"correct": true,
|
|
"inputTokens": 2382,
|
|
"outputTokens": 3,
|
|
"latencyMs": 5010
|
|
},
|
|
{
|
|
"questionId": "q1",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "56176",
|
|
"actual": "56176",
|
|
"correct": true,
|
|
"inputTokens": 2856,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1472
|
|
},
|
|
{
|
|
"questionId": "q1",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "56176",
|
|
"actual": "56176",
|
|
"correct": true,
|
|
"inputTokens": 6317,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1667
|
|
},
|
|
{
|
|
"questionId": "q1",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "56176",
|
|
"actual": "56176",
|
|
"correct": true,
|
|
"inputTokens": 6365,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1507
|
|
},
|
|
{
|
|
"questionId": "q1",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "56176",
|
|
"actual": "56176",
|
|
"correct": true,
|
|
"inputTokens": 5013,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1325
|
|
},
|
|
{
|
|
"questionId": "q1",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "56176",
|
|
"actual": "56176",
|
|
"correct": true,
|
|
"inputTokens": 5760,
|
|
"outputTokens": 6,
|
|
"latencyMs": 2280
|
|
},
|
|
{
|
|
"questionId": "q2",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 6391,
|
|
"outputTokens": 2,
|
|
"latencyMs": 3167
|
|
},
|
|
{
|
|
"questionId": "q2",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 7869,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1267
|
|
},
|
|
{
|
|
"questionId": "q2",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 2528,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1402
|
|
},
|
|
{
|
|
"questionId": "q2",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 2981,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1290
|
|
},
|
|
{
|
|
"questionId": "q2",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 2382,
|
|
"outputTokens": 2,
|
|
"latencyMs": 5070
|
|
},
|
|
{
|
|
"questionId": "q2",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 2855,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1320
|
|
},
|
|
{
|
|
"questionId": "q2",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 6317,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1745
|
|
},
|
|
{
|
|
"questionId": "q2",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 6364,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1191
|
|
},
|
|
{
|
|
"questionId": "q2",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 5013,
|
|
"outputTokens": 2,
|
|
"latencyMs": 2713
|
|
},
|
|
{
|
|
"questionId": "q2",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 5759,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1309
|
|
},
|
|
{
|
|
"questionId": "q3",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "lorenza.kunze@yahoo.com",
|
|
"actual": "lorenza.kunze@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 6393,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1160
|
|
},
|
|
{
|
|
"questionId": "q3",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "lorenza.kunze@yahoo.com",
|
|
"actual": "lorenza.kunze@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 7874,
|
|
"outputTokens": 12,
|
|
"latencyMs": 1338
|
|
},
|
|
{
|
|
"questionId": "q3",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "lorenza.kunze@yahoo.com",
|
|
"actual": "lorenza.kunze@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 2530,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1478
|
|
},
|
|
{
|
|
"questionId": "q3",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "lorenza.kunze@yahoo.com",
|
|
"actual": "lorenza.kunze@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 2986,
|
|
"outputTokens": 12,
|
|
"latencyMs": 1563
|
|
},
|
|
{
|
|
"questionId": "q3",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "lorenza.kunze@yahoo.com",
|
|
"actual": "lorenza.kunze@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 2384,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1310
|
|
},
|
|
{
|
|
"questionId": "q3",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "lorenza.kunze@yahoo.com",
|
|
"actual": "lorenza.kunze@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 2860,
|
|
"outputTokens": 12,
|
|
"latencyMs": 1236
|
|
},
|
|
{
|
|
"questionId": "q3",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "lorenza.kunze@yahoo.com",
|
|
"actual": "lorenza.kunze@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 6319,
|
|
"outputTokens": 7,
|
|
"latencyMs": 2236
|
|
},
|
|
{
|
|
"questionId": "q3",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "lorenza.kunze@yahoo.com",
|
|
"actual": "lorenza.kunze@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 6369,
|
|
"outputTokens": 12,
|
|
"latencyMs": 1253
|
|
},
|
|
{
|
|
"questionId": "q3",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "lorenza.kunze@yahoo.com",
|
|
"actual": "lorenza.kunze@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 5015,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1917
|
|
},
|
|
{
|
|
"questionId": "q3",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "lorenza.kunze@yahoo.com",
|
|
"actual": "lorenza.kunze@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 5764,
|
|
"outputTokens": 12,
|
|
"latencyMs": 1332
|
|
},
|
|
{
|
|
"questionId": "q4",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "117381",
|
|
"actual": "117381",
|
|
"correct": true,
|
|
"inputTokens": 6391,
|
|
"outputTokens": 3,
|
|
"latencyMs": 2945
|
|
},
|
|
{
|
|
"questionId": "q4",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "117381",
|
|
"actual": "117381",
|
|
"correct": true,
|
|
"inputTokens": 7870,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1773
|
|
},
|
|
{
|
|
"questionId": "q4",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "117381",
|
|
"actual": "117381",
|
|
"correct": true,
|
|
"inputTokens": 2528,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1294
|
|
},
|
|
{
|
|
"questionId": "q4",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "117381",
|
|
"actual": "117381",
|
|
"correct": true,
|
|
"inputTokens": 2982,
|
|
"outputTokens": 6,
|
|
"latencyMs": 980
|
|
},
|
|
{
|
|
"questionId": "q4",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "117381",
|
|
"actual": "117381",
|
|
"correct": true,
|
|
"inputTokens": 2382,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1747
|
|
},
|
|
{
|
|
"questionId": "q4",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "117381",
|
|
"actual": "117381",
|
|
"correct": true,
|
|
"inputTokens": 2856,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1197
|
|
},
|
|
{
|
|
"questionId": "q4",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "117381",
|
|
"actual": "117381",
|
|
"correct": true,
|
|
"inputTokens": 6317,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1039
|
|
},
|
|
{
|
|
"questionId": "q4",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "117381",
|
|
"actual": "117381",
|
|
"correct": true,
|
|
"inputTokens": 6365,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1453
|
|
},
|
|
{
|
|
"questionId": "q4",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "117381",
|
|
"actual": "117381",
|
|
"correct": true,
|
|
"inputTokens": 5013,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1056
|
|
},
|
|
{
|
|
"questionId": "q4",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "117381",
|
|
"actual": "117381",
|
|
"correct": true,
|
|
"inputTokens": 5760,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1564
|
|
},
|
|
{
|
|
"questionId": "q5",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 6390,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1263
|
|
},
|
|
{
|
|
"questionId": "q5",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 7868,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1097
|
|
},
|
|
{
|
|
"questionId": "q5",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 2527,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1248
|
|
},
|
|
{
|
|
"questionId": "q5",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 2980,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1486
|
|
},
|
|
{
|
|
"questionId": "q5",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 2381,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1311
|
|
},
|
|
{
|
|
"questionId": "q5",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 2854,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1019
|
|
},
|
|
{
|
|
"questionId": "q5",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 6316,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1287
|
|
},
|
|
{
|
|
"questionId": "q5",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 6363,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1243
|
|
},
|
|
{
|
|
"questionId": "q5",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 5012,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1339
|
|
},
|
|
{
|
|
"questionId": "q5",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 5758,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1621
|
|
},
|
|
{
|
|
"questionId": "q6",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "jayda60@hotmail.com",
|
|
"actual": "jayda60@hotmail.com",
|
|
"correct": true,
|
|
"inputTokens": 6391,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1625
|
|
},
|
|
{
|
|
"questionId": "q6",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "jayda60@hotmail.com",
|
|
"actual": "jayda60@hotmail.com",
|
|
"correct": true,
|
|
"inputTokens": 7871,
|
|
"outputTokens": 11,
|
|
"latencyMs": 1328
|
|
},
|
|
{
|
|
"questionId": "q6",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "jayda60@hotmail.com",
|
|
"actual": "jayda60@hotmail.com",
|
|
"correct": true,
|
|
"inputTokens": 2528,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1463
|
|
},
|
|
{
|
|
"questionId": "q6",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "jayda60@hotmail.com",
|
|
"actual": "jayda60@hotmail.com",
|
|
"correct": true,
|
|
"inputTokens": 2983,
|
|
"outputTokens": 11,
|
|
"latencyMs": 1149
|
|
},
|
|
{
|
|
"questionId": "q6",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "jayda60@hotmail.com",
|
|
"actual": "jayda60@hotmail.com",
|
|
"correct": true,
|
|
"inputTokens": 2382,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1474
|
|
},
|
|
{
|
|
"questionId": "q6",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "jayda60@hotmail.com",
|
|
"actual": "jayda60@hotmail.com",
|
|
"correct": true,
|
|
"inputTokens": 2857,
|
|
"outputTokens": 11,
|
|
"latencyMs": 977
|
|
},
|
|
{
|
|
"questionId": "q6",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "jayda60@hotmail.com",
|
|
"actual": "jayda60@hotmail.com",
|
|
"correct": true,
|
|
"inputTokens": 6317,
|
|
"outputTokens": 6,
|
|
"latencyMs": 2079
|
|
},
|
|
{
|
|
"questionId": "q6",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "jayda60@hotmail.com",
|
|
"actual": "jayda60@hotmail.com",
|
|
"correct": true,
|
|
"inputTokens": 6366,
|
|
"outputTokens": 11,
|
|
"latencyMs": 1134
|
|
},
|
|
{
|
|
"questionId": "q6",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "jayda60@hotmail.com",
|
|
"actual": "jayda60@hotmail.com",
|
|
"correct": true,
|
|
"inputTokens": 5013,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1124
|
|
},
|
|
{
|
|
"questionId": "q6",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "jayda60@hotmail.com",
|
|
"actual": "jayda60@hotmail.com",
|
|
"correct": true,
|
|
"inputTokens": 5761,
|
|
"outputTokens": 11,
|
|
"latencyMs": 1053
|
|
},
|
|
{
|
|
"questionId": "q7",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "92971",
|
|
"actual": "92971",
|
|
"correct": true,
|
|
"inputTokens": 6391,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1427
|
|
},
|
|
{
|
|
"questionId": "q7",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "92971",
|
|
"actual": "92971",
|
|
"correct": true,
|
|
"inputTokens": 7870,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1246
|
|
},
|
|
{
|
|
"questionId": "q7",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "92971",
|
|
"actual": "92971",
|
|
"correct": true,
|
|
"inputTokens": 2528,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1171
|
|
},
|
|
{
|
|
"questionId": "q7",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "92971",
|
|
"actual": "92971",
|
|
"correct": true,
|
|
"inputTokens": 2982,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1547
|
|
},
|
|
{
|
|
"questionId": "q7",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "92971",
|
|
"actual": "92971",
|
|
"correct": true,
|
|
"inputTokens": 2382,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1523
|
|
},
|
|
{
|
|
"questionId": "q7",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "92971",
|
|
"actual": "92971",
|
|
"correct": true,
|
|
"inputTokens": 2856,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1148
|
|
},
|
|
{
|
|
"questionId": "q7",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "92971",
|
|
"actual": "92971",
|
|
"correct": true,
|
|
"inputTokens": 6317,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1360
|
|
},
|
|
{
|
|
"questionId": "q7",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "92971",
|
|
"actual": "92971",
|
|
"correct": true,
|
|
"inputTokens": 6365,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1100
|
|
},
|
|
{
|
|
"questionId": "q7",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "92971",
|
|
"actual": "92971",
|
|
"correct": true,
|
|
"inputTokens": 5013,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1116
|
|
},
|
|
{
|
|
"questionId": "q7",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "92971",
|
|
"actual": "92971",
|
|
"correct": true,
|
|
"inputTokens": 5760,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1202
|
|
},
|
|
{
|
|
"questionId": "q8",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Operations",
|
|
"correct": false,
|
|
"inputTokens": 6391,
|
|
"outputTokens": 2,
|
|
"latencyMs": 974
|
|
},
|
|
{
|
|
"questionId": "q8",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 7871,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1357
|
|
},
|
|
{
|
|
"questionId": "q8",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 2528,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1107
|
|
},
|
|
{
|
|
"questionId": "q8",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 2983,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1126
|
|
},
|
|
{
|
|
"questionId": "q8",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 2382,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1124
|
|
},
|
|
{
|
|
"questionId": "q8",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 2857,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1208
|
|
},
|
|
{
|
|
"questionId": "q8",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Operations",
|
|
"correct": false,
|
|
"inputTokens": 6317,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1463
|
|
},
|
|
{
|
|
"questionId": "q8",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 6366,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1175
|
|
},
|
|
{
|
|
"questionId": "q8",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 5013,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1952
|
|
},
|
|
{
|
|
"questionId": "q8",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 5761,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1271
|
|
},
|
|
{
|
|
"questionId": "q9",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "terrance.hansen@yahoo.com",
|
|
"actual": "terrance.hansen@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 6393,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1301
|
|
},
|
|
{
|
|
"questionId": "q9",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "terrance.hansen@yahoo.com",
|
|
"actual": "terrance.hansen@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 7871,
|
|
"outputTokens": 11,
|
|
"latencyMs": 1371
|
|
},
|
|
{
|
|
"questionId": "q9",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "terrance.hansen@yahoo.com",
|
|
"actual": "terrance.hansen@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 2530,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1197
|
|
},
|
|
{
|
|
"questionId": "q9",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "terrance.hansen@yahoo.com",
|
|
"actual": "terrance.hansen@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 2983,
|
|
"outputTokens": 11,
|
|
"latencyMs": 1088
|
|
},
|
|
{
|
|
"questionId": "q9",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "terrance.hansen@yahoo.com",
|
|
"actual": "terrance.hansen@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 2384,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1310
|
|
},
|
|
{
|
|
"questionId": "q9",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "terrance.hansen@yahoo.com",
|
|
"actual": "terrance.hansen@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 2857,
|
|
"outputTokens": 11,
|
|
"latencyMs": 1300
|
|
},
|
|
{
|
|
"questionId": "q9",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "terrance.hansen@yahoo.com",
|
|
"actual": "terrance.hansen@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 6319,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1531
|
|
},
|
|
{
|
|
"questionId": "q9",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "terrance.hansen@yahoo.com",
|
|
"actual": "terrance.hansen@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 6366,
|
|
"outputTokens": 11,
|
|
"latencyMs": 1275
|
|
},
|
|
{
|
|
"questionId": "q9",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "terrance.hansen@yahoo.com",
|
|
"actual": "terrence.hansen@yahoo.com",
|
|
"correct": false,
|
|
"inputTokens": 5015,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1245
|
|
},
|
|
{
|
|
"questionId": "q9",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "terrance.hansen@yahoo.com",
|
|
"actual": "terrance.hansen@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 5761,
|
|
"outputTokens": 11,
|
|
"latencyMs": 1215
|
|
},
|
|
{
|
|
"questionId": "q10",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "107744",
|
|
"actual": "107744",
|
|
"correct": true,
|
|
"inputTokens": 6392,
|
|
"outputTokens": 3,
|
|
"latencyMs": 4959
|
|
},
|
|
{
|
|
"questionId": "q10",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "107744",
|
|
"actual": "107744",
|
|
"correct": true,
|
|
"inputTokens": 7870,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1269
|
|
},
|
|
{
|
|
"questionId": "q10",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "107744",
|
|
"actual": "107744",
|
|
"correct": true,
|
|
"inputTokens": 2529,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1111
|
|
},
|
|
{
|
|
"questionId": "q10",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "107744",
|
|
"actual": "107744",
|
|
"correct": true,
|
|
"inputTokens": 2982,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1254
|
|
},
|
|
{
|
|
"questionId": "q10",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "107744",
|
|
"actual": "107744",
|
|
"correct": true,
|
|
"inputTokens": 2383,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1616
|
|
},
|
|
{
|
|
"questionId": "q10",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "107744",
|
|
"actual": "107744",
|
|
"correct": true,
|
|
"inputTokens": 2856,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1123
|
|
},
|
|
{
|
|
"questionId": "q10",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "107744",
|
|
"actual": "107744",
|
|
"correct": true,
|
|
"inputTokens": 6318,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1201
|
|
},
|
|
{
|
|
"questionId": "q10",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "107744",
|
|
"actual": "107744",
|
|
"correct": true,
|
|
"inputTokens": 6365,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1371
|
|
},
|
|
{
|
|
"questionId": "q10",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "107744",
|
|
"actual": "107744",
|
|
"correct": true,
|
|
"inputTokens": 5014,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1503
|
|
},
|
|
{
|
|
"questionId": "q10",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "107744",
|
|
"actual": "107744",
|
|
"correct": true,
|
|
"inputTokens": 5760,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1249
|
|
},
|
|
{
|
|
"questionId": "q11",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 6391,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1383
|
|
},
|
|
{
|
|
"questionId": "q11",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 7869,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1081
|
|
},
|
|
{
|
|
"questionId": "q11",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 2528,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1677
|
|
},
|
|
{
|
|
"questionId": "q11",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 2981,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1072
|
|
},
|
|
{
|
|
"questionId": "q11",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 2382,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1142
|
|
},
|
|
{
|
|
"questionId": "q11",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 2855,
|
|
"outputTokens": 4,
|
|
"latencyMs": 991
|
|
},
|
|
{
|
|
"questionId": "q11",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 6317,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1339
|
|
},
|
|
{
|
|
"questionId": "q11",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 6364,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1117
|
|
},
|
|
{
|
|
"questionId": "q11",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 5013,
|
|
"outputTokens": 2,
|
|
"latencyMs": 2483
|
|
},
|
|
{
|
|
"questionId": "q11",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 5759,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1187
|
|
},
|
|
{
|
|
"questionId": "q12",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "allan21@gmail.com",
|
|
"actual": "allan21@gmail.com",
|
|
"correct": true,
|
|
"inputTokens": 6390,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1827
|
|
},
|
|
{
|
|
"questionId": "q12",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "allan21@gmail.com",
|
|
"actual": "allan21@gmail.com",
|
|
"correct": true,
|
|
"inputTokens": 7867,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1121
|
|
},
|
|
{
|
|
"questionId": "q12",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "allan21@gmail.com",
|
|
"actual": "allan21@gmail.com",
|
|
"correct": true,
|
|
"inputTokens": 2527,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1373
|
|
},
|
|
{
|
|
"questionId": "q12",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "allan21@gmail.com",
|
|
"actual": "allan21@gmail.com",
|
|
"correct": true,
|
|
"inputTokens": 2979,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1284
|
|
},
|
|
{
|
|
"questionId": "q12",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "allan21@gmail.com",
|
|
"actual": "allan21@gmail.com",
|
|
"correct": true,
|
|
"inputTokens": 2381,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1751
|
|
},
|
|
{
|
|
"questionId": "q12",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "allan21@gmail.com",
|
|
"actual": "allan21@gmail.com",
|
|
"correct": true,
|
|
"inputTokens": 2853,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1140
|
|
},
|
|
{
|
|
"questionId": "q12",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "allan21@gmail.com",
|
|
"actual": "allan21@gmail.com",
|
|
"correct": true,
|
|
"inputTokens": 6316,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1624
|
|
},
|
|
{
|
|
"questionId": "q12",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "allan21@gmail.com",
|
|
"actual": "allan21@gmail.com",
|
|
"correct": true,
|
|
"inputTokens": 6362,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1071
|
|
},
|
|
{
|
|
"questionId": "q12",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "allan21@gmail.com",
|
|
"actual": "allan21@gmail.com",
|
|
"correct": true,
|
|
"inputTokens": 5012,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1970
|
|
},
|
|
{
|
|
"questionId": "q12",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "allan21@gmail.com",
|
|
"actual": "allan21@gmail.com",
|
|
"correct": true,
|
|
"inputTokens": 5757,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1437
|
|
},
|
|
{
|
|
"questionId": "q13",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "145843",
|
|
"actual": "145843",
|
|
"correct": true,
|
|
"inputTokens": 6389,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1263
|
|
},
|
|
{
|
|
"questionId": "q13",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "145843",
|
|
"actual": "145843",
|
|
"correct": true,
|
|
"inputTokens": 7868,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1277
|
|
},
|
|
{
|
|
"questionId": "q13",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "145843",
|
|
"actual": "145843",
|
|
"correct": true,
|
|
"inputTokens": 2526,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1151
|
|
},
|
|
{
|
|
"questionId": "q13",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "145843",
|
|
"actual": "145843",
|
|
"correct": true,
|
|
"inputTokens": 2980,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1260
|
|
},
|
|
{
|
|
"questionId": "q13",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "145843",
|
|
"actual": "145843",
|
|
"correct": true,
|
|
"inputTokens": 2380,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1071
|
|
},
|
|
{
|
|
"questionId": "q13",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "145843",
|
|
"actual": "145843",
|
|
"correct": true,
|
|
"inputTokens": 2854,
|
|
"outputTokens": 6,
|
|
"latencyMs": 891
|
|
},
|
|
{
|
|
"questionId": "q13",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "145843",
|
|
"actual": "145843",
|
|
"correct": true,
|
|
"inputTokens": 6315,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1548
|
|
},
|
|
{
|
|
"questionId": "q13",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "145843",
|
|
"actual": "145843",
|
|
"correct": true,
|
|
"inputTokens": 6363,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1456
|
|
},
|
|
{
|
|
"questionId": "q13",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "145843",
|
|
"actual": "145843",
|
|
"correct": true,
|
|
"inputTokens": 5011,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1268
|
|
},
|
|
{
|
|
"questionId": "q13",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "145843",
|
|
"actual": "145843",
|
|
"correct": true,
|
|
"inputTokens": 5758,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1205
|
|
},
|
|
{
|
|
"questionId": "q14",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 6390,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1310
|
|
},
|
|
{
|
|
"questionId": "q14",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 7868,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1071
|
|
},
|
|
{
|
|
"questionId": "q14",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 2527,
|
|
"outputTokens": 2,
|
|
"latencyMs": 895
|
|
},
|
|
{
|
|
"questionId": "q14",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 2980,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1020
|
|
},
|
|
{
|
|
"questionId": "q14",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 2381,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1168
|
|
},
|
|
{
|
|
"questionId": "q14",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 2854,
|
|
"outputTokens": 4,
|
|
"latencyMs": 977
|
|
},
|
|
{
|
|
"questionId": "q14",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Operations",
|
|
"correct": false,
|
|
"inputTokens": 6316,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1370
|
|
},
|
|
{
|
|
"questionId": "q14",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 6363,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1508
|
|
},
|
|
{
|
|
"questionId": "q14",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 5012,
|
|
"outputTokens": 2,
|
|
"latencyMs": 3622
|
|
},
|
|
{
|
|
"questionId": "q14",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 5758,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1249
|
|
},
|
|
{
|
|
"questionId": "q15",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "alexandria61@gmail.com",
|
|
"actual": "alexandria61@gmail.com",
|
|
"correct": true,
|
|
"inputTokens": 6391,
|
|
"outputTokens": 7,
|
|
"latencyMs": 3269
|
|
},
|
|
{
|
|
"questionId": "q15",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "alexandria61@gmail.com",
|
|
"actual": "alexandria61@gmail.com",
|
|
"correct": true,
|
|
"inputTokens": 7869,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1538
|
|
},
|
|
{
|
|
"questionId": "q15",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "alexandria61@gmail.com",
|
|
"actual": "alexandria61@gmail.com",
|
|
"correct": true,
|
|
"inputTokens": 2528,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1413
|
|
},
|
|
{
|
|
"questionId": "q15",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "alexandria61@gmail.com",
|
|
"actual": "alexandria61@gmail.com",
|
|
"correct": true,
|
|
"inputTokens": 2981,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1027
|
|
},
|
|
{
|
|
"questionId": "q15",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "alexandria61@gmail.com",
|
|
"actual": "alexandria61@gmail.com",
|
|
"correct": true,
|
|
"inputTokens": 2382,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1257
|
|
},
|
|
{
|
|
"questionId": "q15",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "alexandria61@gmail.com",
|
|
"actual": "alexandria61@gmail.com",
|
|
"correct": true,
|
|
"inputTokens": 2855,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1169
|
|
},
|
|
{
|
|
"questionId": "q15",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "alexandria61@gmail.com",
|
|
"actual": "alexandria61@gmail.com",
|
|
"correct": true,
|
|
"inputTokens": 6317,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1464
|
|
},
|
|
{
|
|
"questionId": "q15",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "alexandria61@gmail.com",
|
|
"actual": "alexandria61@gmail.com",
|
|
"correct": true,
|
|
"inputTokens": 6364,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1799
|
|
},
|
|
{
|
|
"questionId": "q15",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "alexandria61@gmail.com",
|
|
"actual": "alexandria61@gmail.com",
|
|
"correct": true,
|
|
"inputTokens": 5013,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1616
|
|
},
|
|
{
|
|
"questionId": "q15",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "alexandria61@gmail.com",
|
|
"actual": "alexandria61@gmail.com",
|
|
"correct": true,
|
|
"inputTokens": 5759,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1349
|
|
},
|
|
{
|
|
"questionId": "q16",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "89436",
|
|
"actual": "89436",
|
|
"correct": true,
|
|
"inputTokens": 6390,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1298
|
|
},
|
|
{
|
|
"questionId": "q16",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "89436",
|
|
"actual": "89436",
|
|
"correct": true,
|
|
"inputTokens": 7870,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1115
|
|
},
|
|
{
|
|
"questionId": "q16",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "89436",
|
|
"actual": "89436",
|
|
"correct": true,
|
|
"inputTokens": 2527,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1180
|
|
},
|
|
{
|
|
"questionId": "q16",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "89436",
|
|
"actual": "89436",
|
|
"correct": true,
|
|
"inputTokens": 2982,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1110
|
|
},
|
|
{
|
|
"questionId": "q16",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "89436",
|
|
"actual": "89436",
|
|
"correct": true,
|
|
"inputTokens": 2381,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1235
|
|
},
|
|
{
|
|
"questionId": "q16",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "89436",
|
|
"actual": "89436",
|
|
"correct": true,
|
|
"inputTokens": 2856,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1228
|
|
},
|
|
{
|
|
"questionId": "q16",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "89436",
|
|
"actual": "89436",
|
|
"correct": true,
|
|
"inputTokens": 6316,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1832
|
|
},
|
|
{
|
|
"questionId": "q16",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "89436",
|
|
"actual": "89436",
|
|
"correct": true,
|
|
"inputTokens": 6365,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1401
|
|
},
|
|
{
|
|
"questionId": "q16",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "89436",
|
|
"actual": "89436",
|
|
"correct": true,
|
|
"inputTokens": 5012,
|
|
"outputTokens": 3,
|
|
"latencyMs": 933
|
|
},
|
|
{
|
|
"questionId": "q16",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "89436",
|
|
"actual": "89436",
|
|
"correct": true,
|
|
"inputTokens": 5760,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1570
|
|
},
|
|
{
|
|
"questionId": "q17",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 6393,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1221
|
|
},
|
|
{
|
|
"questionId": "q17",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 7872,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1293
|
|
},
|
|
{
|
|
"questionId": "q17",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 2530,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1147
|
|
},
|
|
{
|
|
"questionId": "q17",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 2984,
|
|
"outputTokens": 4,
|
|
"latencyMs": 923
|
|
},
|
|
{
|
|
"questionId": "q17",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 2384,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1180
|
|
},
|
|
{
|
|
"questionId": "q17",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 2858,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1025
|
|
},
|
|
{
|
|
"questionId": "q17",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 6319,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1748
|
|
},
|
|
{
|
|
"questionId": "q17",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 6367,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1188
|
|
},
|
|
{
|
|
"questionId": "q17",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 5015,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1452
|
|
},
|
|
{
|
|
"questionId": "q17",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 5762,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1329
|
|
},
|
|
{
|
|
"questionId": "q18",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "kelvin54@yahoo.com",
|
|
"actual": "kelvin54@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 6391,
|
|
"outputTokens": 6,
|
|
"latencyMs": 768
|
|
},
|
|
{
|
|
"questionId": "q18",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "kelvin54@yahoo.com",
|
|
"actual": "kelvin54@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 7871,
|
|
"outputTokens": 10,
|
|
"latencyMs": 1150
|
|
},
|
|
{
|
|
"questionId": "q18",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "kelvin54@yahoo.com",
|
|
"actual": "kelvin54@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 2528,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1501
|
|
},
|
|
{
|
|
"questionId": "q18",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "kelvin54@yahoo.com",
|
|
"actual": "kelvin54@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 2983,
|
|
"outputTokens": 10,
|
|
"latencyMs": 1201
|
|
},
|
|
{
|
|
"questionId": "q18",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "kelvin54@yahoo.com",
|
|
"actual": "kelvin54@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 2382,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1604
|
|
},
|
|
{
|
|
"questionId": "q18",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "kelvin54@yahoo.com",
|
|
"actual": "kelvin54@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 2857,
|
|
"outputTokens": 10,
|
|
"latencyMs": 1060
|
|
},
|
|
{
|
|
"questionId": "q18",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "kelvin54@yahoo.com",
|
|
"actual": "kelvin54@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 6317,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1350
|
|
},
|
|
{
|
|
"questionId": "q18",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "kelvin54@yahoo.com",
|
|
"actual": "kelvin54@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 6366,
|
|
"outputTokens": 10,
|
|
"latencyMs": 1154
|
|
},
|
|
{
|
|
"questionId": "q18",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "kelvin54@yahoo.com",
|
|
"actual": "kelvin54@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 5013,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1199
|
|
},
|
|
{
|
|
"questionId": "q18",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "kelvin54@yahoo.com",
|
|
"actual": "kelvin54@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 5761,
|
|
"outputTokens": 10,
|
|
"latencyMs": 1216
|
|
},
|
|
{
|
|
"questionId": "q19",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "143365",
|
|
"actual": "143365",
|
|
"correct": true,
|
|
"inputTokens": 6391,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1412
|
|
},
|
|
{
|
|
"questionId": "q19",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "143365",
|
|
"actual": "143365",
|
|
"correct": true,
|
|
"inputTokens": 7872,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1908
|
|
},
|
|
{
|
|
"questionId": "q19",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "143365",
|
|
"actual": "143365",
|
|
"correct": true,
|
|
"inputTokens": 2528,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1366
|
|
},
|
|
{
|
|
"questionId": "q19",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "143365",
|
|
"actual": "143365",
|
|
"correct": true,
|
|
"inputTokens": 2984,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1054
|
|
},
|
|
{
|
|
"questionId": "q19",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "143365",
|
|
"actual": "143365",
|
|
"correct": true,
|
|
"inputTokens": 2382,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1121
|
|
},
|
|
{
|
|
"questionId": "q19",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "143365",
|
|
"actual": "143365",
|
|
"correct": true,
|
|
"inputTokens": 2858,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1262
|
|
},
|
|
{
|
|
"questionId": "q19",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "143365",
|
|
"actual": "143365",
|
|
"correct": true,
|
|
"inputTokens": 6317,
|
|
"outputTokens": 3,
|
|
"latencyMs": 4632
|
|
},
|
|
{
|
|
"questionId": "q19",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "143365",
|
|
"actual": "143365",
|
|
"correct": true,
|
|
"inputTokens": 6367,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1118
|
|
},
|
|
{
|
|
"questionId": "q19",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "143365",
|
|
"actual": "143365",
|
|
"correct": true,
|
|
"inputTokens": 5013,
|
|
"outputTokens": 3,
|
|
"latencyMs": 928
|
|
},
|
|
{
|
|
"questionId": "q19",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "143365",
|
|
"actual": "143365",
|
|
"correct": true,
|
|
"inputTokens": 5762,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1191
|
|
},
|
|
{
|
|
"questionId": "q20",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 6390,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1053
|
|
},
|
|
{
|
|
"questionId": "q20",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 7868,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1096
|
|
},
|
|
{
|
|
"questionId": "q20",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 2527,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1784
|
|
},
|
|
{
|
|
"questionId": "q20",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 2980,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1093
|
|
},
|
|
{
|
|
"questionId": "q20",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 2381,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1335
|
|
},
|
|
{
|
|
"questionId": "q20",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 2854,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1546
|
|
},
|
|
{
|
|
"questionId": "q20",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 6316,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1293
|
|
},
|
|
{
|
|
"questionId": "q20",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 6363,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1230
|
|
},
|
|
{
|
|
"questionId": "q20",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 5012,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1467
|
|
},
|
|
{
|
|
"questionId": "q20",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 5758,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1370
|
|
},
|
|
{
|
|
"questionId": "q21",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "dean19@gmail.com",
|
|
"actual": "dean19@gmail.com",
|
|
"correct": true,
|
|
"inputTokens": 6394,
|
|
"outputTokens": 6,
|
|
"latencyMs": 5026
|
|
},
|
|
{
|
|
"questionId": "q21",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "dean19@gmail.com",
|
|
"actual": "dean19@gmail.com",
|
|
"correct": true,
|
|
"inputTokens": 7876,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1786
|
|
},
|
|
{
|
|
"questionId": "q21",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "dean19@gmail.com",
|
|
"actual": "dean19@gmail.com",
|
|
"correct": true,
|
|
"inputTokens": 2531,
|
|
"outputTokens": 6,
|
|
"latencyMs": 826
|
|
},
|
|
{
|
|
"questionId": "q21",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "dean19@gmail.com",
|
|
"actual": "dean19@gmail.com",
|
|
"correct": true,
|
|
"inputTokens": 2988,
|
|
"outputTokens": 9,
|
|
"latencyMs": 909
|
|
},
|
|
{
|
|
"questionId": "q21",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "dean19@gmail.com",
|
|
"actual": "dean19@gmail.com",
|
|
"correct": true,
|
|
"inputTokens": 2385,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1120
|
|
},
|
|
{
|
|
"questionId": "q21",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "dean19@gmail.com",
|
|
"actual": "dean19@gmail.com",
|
|
"correct": true,
|
|
"inputTokens": 2862,
|
|
"outputTokens": 9,
|
|
"latencyMs": 996
|
|
},
|
|
{
|
|
"questionId": "q21",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "dean19@gmail.com",
|
|
"actual": "dean19@gmail.com",
|
|
"correct": true,
|
|
"inputTokens": 6320,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1639
|
|
},
|
|
{
|
|
"questionId": "q21",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "dean19@gmail.com",
|
|
"actual": "dean19@gmail.com",
|
|
"correct": true,
|
|
"inputTokens": 6371,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1299
|
|
},
|
|
{
|
|
"questionId": "q21",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "dean19@gmail.com",
|
|
"actual": "dean19@gmail.com",
|
|
"correct": true,
|
|
"inputTokens": 5016,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1151
|
|
},
|
|
{
|
|
"questionId": "q21",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "dean19@gmail.com",
|
|
"actual": "dean19@gmail.com",
|
|
"correct": true,
|
|
"inputTokens": 5766,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1246
|
|
},
|
|
{
|
|
"questionId": "q22",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "111314",
|
|
"actual": "111314",
|
|
"correct": true,
|
|
"inputTokens": 6392,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1838
|
|
},
|
|
{
|
|
"questionId": "q22",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "111314",
|
|
"actual": "111314",
|
|
"correct": true,
|
|
"inputTokens": 7871,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1191
|
|
},
|
|
{
|
|
"questionId": "q22",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "111314",
|
|
"actual": "111314",
|
|
"correct": true,
|
|
"inputTokens": 2529,
|
|
"outputTokens": 3,
|
|
"latencyMs": 980
|
|
},
|
|
{
|
|
"questionId": "q22",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "111314",
|
|
"actual": "111314",
|
|
"correct": true,
|
|
"inputTokens": 2983,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1299
|
|
},
|
|
{
|
|
"questionId": "q22",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "111314",
|
|
"actual": "111314",
|
|
"correct": true,
|
|
"inputTokens": 2383,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1027
|
|
},
|
|
{
|
|
"questionId": "q22",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "111314",
|
|
"actual": "111314",
|
|
"correct": true,
|
|
"inputTokens": 2857,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1433
|
|
},
|
|
{
|
|
"questionId": "q22",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "111314",
|
|
"actual": "111314",
|
|
"correct": true,
|
|
"inputTokens": 6318,
|
|
"outputTokens": 3,
|
|
"latencyMs": 2256
|
|
},
|
|
{
|
|
"questionId": "q22",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "111314",
|
|
"actual": "111314",
|
|
"correct": true,
|
|
"inputTokens": 6366,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1091
|
|
},
|
|
{
|
|
"questionId": "q22",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "111314",
|
|
"actual": "111314",
|
|
"correct": true,
|
|
"inputTokens": 5014,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1288
|
|
},
|
|
{
|
|
"questionId": "q22",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "111314",
|
|
"actual": "111314",
|
|
"correct": true,
|
|
"inputTokens": 5761,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1306
|
|
},
|
|
{
|
|
"questionId": "q23",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 6389,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1951
|
|
},
|
|
{
|
|
"questionId": "q23",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 7868,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1440
|
|
},
|
|
{
|
|
"questionId": "q23",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 2526,
|
|
"outputTokens": 2,
|
|
"latencyMs": 978
|
|
},
|
|
{
|
|
"questionId": "q23",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 2980,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1385
|
|
},
|
|
{
|
|
"questionId": "q23",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 2380,
|
|
"outputTokens": 2,
|
|
"latencyMs": 2311
|
|
},
|
|
{
|
|
"questionId": "q23",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 2854,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1066
|
|
},
|
|
{
|
|
"questionId": "q23",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 6315,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1914
|
|
},
|
|
{
|
|
"questionId": "q23",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 6363,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1596
|
|
},
|
|
{
|
|
"questionId": "q23",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 5011,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1820
|
|
},
|
|
{
|
|
"questionId": "q23",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 5758,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1067
|
|
},
|
|
{
|
|
"questionId": "q24",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "laurel54@yahoo.com",
|
|
"actual": "laurel54@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 6391,
|
|
"outputTokens": 6,
|
|
"latencyMs": 2594
|
|
},
|
|
{
|
|
"questionId": "q24",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "laurel54@yahoo.com",
|
|
"actual": "laurel54@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 7869,
|
|
"outputTokens": 10,
|
|
"latencyMs": 1139
|
|
},
|
|
{
|
|
"questionId": "q24",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "laurel54@yahoo.com",
|
|
"actual": "laurel54@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 2528,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1225
|
|
},
|
|
{
|
|
"questionId": "q24",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "laurel54@yahoo.com",
|
|
"actual": "laurel54@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 2981,
|
|
"outputTokens": 10,
|
|
"latencyMs": 1082
|
|
},
|
|
{
|
|
"questionId": "q24",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "laurel54@yahoo.com",
|
|
"actual": "laurel54@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 2382,
|
|
"outputTokens": 6,
|
|
"latencyMs": 4857
|
|
},
|
|
{
|
|
"questionId": "q24",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "laurel54@yahoo.com",
|
|
"actual": "laurel54@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 2855,
|
|
"outputTokens": 10,
|
|
"latencyMs": 1082
|
|
},
|
|
{
|
|
"questionId": "q24",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "laurel54@yahoo.com",
|
|
"actual": "laurel54@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 6317,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1272
|
|
},
|
|
{
|
|
"questionId": "q24",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "laurel54@yahoo.com",
|
|
"actual": "laurel54@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 6364,
|
|
"outputTokens": 10,
|
|
"latencyMs": 1201
|
|
},
|
|
{
|
|
"questionId": "q24",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "laurel54@yahoo.com",
|
|
"actual": "laurel54@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 5013,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1197
|
|
},
|
|
{
|
|
"questionId": "q24",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "laurel54@yahoo.com",
|
|
"actual": "laurel54@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 5759,
|
|
"outputTokens": 10,
|
|
"latencyMs": 1198
|
|
},
|
|
{
|
|
"questionId": "q25",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "89553",
|
|
"actual": "89553",
|
|
"correct": true,
|
|
"inputTokens": 6392,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1085
|
|
},
|
|
{
|
|
"questionId": "q25",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "89553",
|
|
"actual": "89553",
|
|
"correct": true,
|
|
"inputTokens": 7873,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1102
|
|
},
|
|
{
|
|
"questionId": "q25",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "89553",
|
|
"actual": "89553",
|
|
"correct": true,
|
|
"inputTokens": 2529,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1350
|
|
},
|
|
{
|
|
"questionId": "q25",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "89553",
|
|
"actual": "89553",
|
|
"correct": true,
|
|
"inputTokens": 2985,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1300
|
|
},
|
|
{
|
|
"questionId": "q25",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "89553",
|
|
"actual": "89553",
|
|
"correct": true,
|
|
"inputTokens": 2383,
|
|
"outputTokens": 3,
|
|
"latencyMs": 998
|
|
},
|
|
{
|
|
"questionId": "q25",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "89553",
|
|
"actual": "89553",
|
|
"correct": true,
|
|
"inputTokens": 2859,
|
|
"outputTokens": 6,
|
|
"latencyMs": 972
|
|
},
|
|
{
|
|
"questionId": "q25",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "89553",
|
|
"actual": "89553",
|
|
"correct": true,
|
|
"inputTokens": 6318,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1331
|
|
},
|
|
{
|
|
"questionId": "q25",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "89553",
|
|
"actual": "89553",
|
|
"correct": true,
|
|
"inputTokens": 6368,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1027
|
|
},
|
|
{
|
|
"questionId": "q25",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "89553",
|
|
"actual": "89553",
|
|
"correct": true,
|
|
"inputTokens": 5014,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1170
|
|
},
|
|
{
|
|
"questionId": "q25",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "89553",
|
|
"actual": "89553",
|
|
"correct": true,
|
|
"inputTokens": 5763,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1074
|
|
},
|
|
{
|
|
"questionId": "q26",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 6389,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1862
|
|
},
|
|
{
|
|
"questionId": "q26",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 7866,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1435
|
|
},
|
|
{
|
|
"questionId": "q26",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 2526,
|
|
"outputTokens": 2,
|
|
"latencyMs": 989
|
|
},
|
|
{
|
|
"questionId": "q26",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 2978,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1035
|
|
},
|
|
{
|
|
"questionId": "q26",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 2380,
|
|
"outputTokens": 2,
|
|
"latencyMs": 2157
|
|
},
|
|
{
|
|
"questionId": "q26",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 2852,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1094
|
|
},
|
|
{
|
|
"questionId": "q26",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 6315,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1912
|
|
},
|
|
{
|
|
"questionId": "q26",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 6361,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1364
|
|
},
|
|
{
|
|
"questionId": "q26",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 5011,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1435
|
|
},
|
|
{
|
|
"questionId": "q26",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 5756,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1082
|
|
},
|
|
{
|
|
"questionId": "q27",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "jayme.kertzmann77@gmail.com",
|
|
"actual": "jayme.kertzmann77@gmail.com",
|
|
"correct": true,
|
|
"inputTokens": 6392,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1274
|
|
},
|
|
{
|
|
"questionId": "q27",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "jayme.kertzmann77@gmail.com",
|
|
"actual": "jayme.kertzmann77@gmail.com",
|
|
"correct": true,
|
|
"inputTokens": 7871,
|
|
"outputTokens": 14,
|
|
"latencyMs": 1130
|
|
},
|
|
{
|
|
"questionId": "q27",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "jayme.kertzmann77@gmail.com",
|
|
"actual": "jayme.kertzmann77@gmail.com",
|
|
"correct": true,
|
|
"inputTokens": 2529,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1795
|
|
},
|
|
{
|
|
"questionId": "q27",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "jayme.kertzmann77@gmail.com",
|
|
"actual": "jayme.kertzmann77@gmail.com",
|
|
"correct": true,
|
|
"inputTokens": 2983,
|
|
"outputTokens": 14,
|
|
"latencyMs": 1309
|
|
},
|
|
{
|
|
"questionId": "q27",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "jayme.kertzmann77@gmail.com",
|
|
"actual": "jayme.kertzmann77@gmail.com",
|
|
"correct": true,
|
|
"inputTokens": 2383,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1406
|
|
},
|
|
{
|
|
"questionId": "q27",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "jayme.kertzmann77@gmail.com",
|
|
"actual": "jayme.kertzmann77@gmail.com",
|
|
"correct": true,
|
|
"inputTokens": 2857,
|
|
"outputTokens": 14,
|
|
"latencyMs": 1398
|
|
},
|
|
{
|
|
"questionId": "q27",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "jayme.kertzmann77@gmail.com",
|
|
"actual": "jayme.kertzmann77@gmail.com",
|
|
"correct": true,
|
|
"inputTokens": 6318,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1114
|
|
},
|
|
{
|
|
"questionId": "q27",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "jayme.kertzmann77@gmail.com",
|
|
"actual": "jayme.kertzmann77@gmail.com",
|
|
"correct": true,
|
|
"inputTokens": 6366,
|
|
"outputTokens": 14,
|
|
"latencyMs": 1251
|
|
},
|
|
{
|
|
"questionId": "q27",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "jayme.kertzmann77@gmail.com",
|
|
"actual": "jayme.kertzmann77@gmail.com",
|
|
"correct": true,
|
|
"inputTokens": 5014,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1941
|
|
},
|
|
{
|
|
"questionId": "q27",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "jayme.kertzmann77@gmail.com",
|
|
"actual": "jayme.kertzmann77@gmail.com",
|
|
"correct": true,
|
|
"inputTokens": 5761,
|
|
"outputTokens": 14,
|
|
"latencyMs": 1218
|
|
},
|
|
{
|
|
"questionId": "q28",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "104053",
|
|
"actual": "104053",
|
|
"correct": true,
|
|
"inputTokens": 6391,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1395
|
|
},
|
|
{
|
|
"questionId": "q28",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "104053",
|
|
"actual": "104053",
|
|
"correct": true,
|
|
"inputTokens": 7871,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1342
|
|
},
|
|
{
|
|
"questionId": "q28",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "104053",
|
|
"actual": "104053",
|
|
"correct": true,
|
|
"inputTokens": 2528,
|
|
"outputTokens": 3,
|
|
"latencyMs": 919
|
|
},
|
|
{
|
|
"questionId": "q28",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "104053",
|
|
"actual": "104053",
|
|
"correct": true,
|
|
"inputTokens": 2983,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1187
|
|
},
|
|
{
|
|
"questionId": "q28",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "104053",
|
|
"actual": "104053",
|
|
"correct": true,
|
|
"inputTokens": 2382,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1131
|
|
},
|
|
{
|
|
"questionId": "q28",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "104053",
|
|
"actual": "104053",
|
|
"correct": true,
|
|
"inputTokens": 2857,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1191
|
|
},
|
|
{
|
|
"questionId": "q28",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "104053",
|
|
"actual": "104053",
|
|
"correct": true,
|
|
"inputTokens": 6317,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1435
|
|
},
|
|
{
|
|
"questionId": "q28",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "104053",
|
|
"actual": "104053",
|
|
"correct": true,
|
|
"inputTokens": 6366,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1095
|
|
},
|
|
{
|
|
"questionId": "q28",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "104053",
|
|
"actual": "104053",
|
|
"correct": true,
|
|
"inputTokens": 5013,
|
|
"outputTokens": 3,
|
|
"latencyMs": 4588
|
|
},
|
|
{
|
|
"questionId": "q28",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "104053",
|
|
"actual": "104053",
|
|
"correct": true,
|
|
"inputTokens": 5761,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1291
|
|
},
|
|
{
|
|
"questionId": "q29",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 6392,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1688
|
|
},
|
|
{
|
|
"questionId": "q29",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 7872,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1301
|
|
},
|
|
{
|
|
"questionId": "q29",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 2529,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1914
|
|
},
|
|
{
|
|
"questionId": "q29",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 2984,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1447
|
|
},
|
|
{
|
|
"questionId": "q29",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 2383,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1725
|
|
},
|
|
{
|
|
"questionId": "q29",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 2858,
|
|
"outputTokens": 4,
|
|
"latencyMs": 923
|
|
},
|
|
{
|
|
"questionId": "q29",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 6318,
|
|
"outputTokens": 2,
|
|
"latencyMs": 879
|
|
},
|
|
{
|
|
"questionId": "q29",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 6367,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1322
|
|
},
|
|
{
|
|
"questionId": "q29",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 5014,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1394
|
|
},
|
|
{
|
|
"questionId": "q29",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 5762,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1008
|
|
},
|
|
{
|
|
"questionId": "q30",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "carley.bauch@yahoo.com",
|
|
"actual": "carley.bauch@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 6391,
|
|
"outputTokens": 7,
|
|
"latencyMs": 894
|
|
},
|
|
{
|
|
"questionId": "q30",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "carley.bauch@yahoo.com",
|
|
"actual": "carley.bauch@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 7869,
|
|
"outputTokens": 12,
|
|
"latencyMs": 1220
|
|
},
|
|
{
|
|
"questionId": "q30",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "carley.bauch@yahoo.com",
|
|
"actual": "carley.bauch@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 2528,
|
|
"outputTokens": 7,
|
|
"latencyMs": 2225
|
|
},
|
|
{
|
|
"questionId": "q30",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "carley.bauch@yahoo.com",
|
|
"actual": "carley.bauch@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 2981,
|
|
"outputTokens": 12,
|
|
"latencyMs": 1282
|
|
},
|
|
{
|
|
"questionId": "q30",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "carley.bauch@yahoo.com",
|
|
"actual": "carley.bauch@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 2382,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1414
|
|
},
|
|
{
|
|
"questionId": "q30",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "carley.bauch@yahoo.com",
|
|
"actual": "carley.bauch@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 2855,
|
|
"outputTokens": 12,
|
|
"latencyMs": 1686
|
|
},
|
|
{
|
|
"questionId": "q30",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "carley.bauch@yahoo.com",
|
|
"actual": "carley.bauch@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 6317,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1113
|
|
},
|
|
{
|
|
"questionId": "q30",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "carley.bauch@yahoo.com",
|
|
"actual": "carley.bauch@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 6364,
|
|
"outputTokens": 12,
|
|
"latencyMs": 1089
|
|
},
|
|
{
|
|
"questionId": "q30",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "carley.bauch@yahoo.com",
|
|
"actual": "carley.bauch@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 5013,
|
|
"outputTokens": 7,
|
|
"latencyMs": 949
|
|
},
|
|
{
|
|
"questionId": "q30",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "carley.bauch@yahoo.com",
|
|
"actual": "carley.bauch@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 5759,
|
|
"outputTokens": 12,
|
|
"latencyMs": 1273
|
|
},
|
|
{
|
|
"questionId": "q31",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "142029",
|
|
"actual": "142029",
|
|
"correct": true,
|
|
"inputTokens": 6394,
|
|
"outputTokens": 3,
|
|
"latencyMs": 4741
|
|
},
|
|
{
|
|
"questionId": "q31",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "142029",
|
|
"actual": "142029",
|
|
"correct": true,
|
|
"inputTokens": 7874,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1132
|
|
},
|
|
{
|
|
"questionId": "q31",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "142029",
|
|
"actual": "142029",
|
|
"correct": true,
|
|
"inputTokens": 2531,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1184
|
|
},
|
|
{
|
|
"questionId": "q31",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "142029",
|
|
"actual": "142029",
|
|
"correct": true,
|
|
"inputTokens": 2986,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1137
|
|
},
|
|
{
|
|
"questionId": "q31",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "142029",
|
|
"actual": "142029",
|
|
"correct": true,
|
|
"inputTokens": 2385,
|
|
"outputTokens": 3,
|
|
"latencyMs": 963
|
|
},
|
|
{
|
|
"questionId": "q31",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "142029",
|
|
"actual": "142029",
|
|
"correct": true,
|
|
"inputTokens": 2860,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1096
|
|
},
|
|
{
|
|
"questionId": "q31",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "142029",
|
|
"actual": "142029",
|
|
"correct": true,
|
|
"inputTokens": 6320,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1399
|
|
},
|
|
{
|
|
"questionId": "q31",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "142029",
|
|
"actual": "142029",
|
|
"correct": true,
|
|
"inputTokens": 6369,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1594
|
|
},
|
|
{
|
|
"questionId": "q31",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "142029",
|
|
"actual": "142029",
|
|
"correct": true,
|
|
"inputTokens": 5016,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1900
|
|
},
|
|
{
|
|
"questionId": "q31",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "142029",
|
|
"actual": "142029",
|
|
"correct": true,
|
|
"inputTokens": 5764,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1274
|
|
},
|
|
{
|
|
"questionId": "q32",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Sales",
|
|
"correct": false,
|
|
"inputTokens": 6390,
|
|
"outputTokens": 2,
|
|
"latencyMs": 5224
|
|
},
|
|
{
|
|
"questionId": "q32",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 7869,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1038
|
|
},
|
|
{
|
|
"questionId": "q32",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 2527,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1902
|
|
},
|
|
{
|
|
"questionId": "q32",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 2981,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1010
|
|
},
|
|
{
|
|
"questionId": "q32",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 2381,
|
|
"outputTokens": 2,
|
|
"latencyMs": 3263
|
|
},
|
|
{
|
|
"questionId": "q32",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 2855,
|
|
"outputTokens": 4,
|
|
"latencyMs": 871
|
|
},
|
|
{
|
|
"questionId": "q32",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Sales",
|
|
"correct": false,
|
|
"inputTokens": 6316,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1278
|
|
},
|
|
{
|
|
"questionId": "q32",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 6364,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1048
|
|
},
|
|
{
|
|
"questionId": "q32",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Sales",
|
|
"correct": false,
|
|
"inputTokens": 5012,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1271
|
|
},
|
|
{
|
|
"questionId": "q32",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 5759,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1075
|
|
},
|
|
{
|
|
"questionId": "q33",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "cheyenne_skiles@hotmail.com",
|
|
"actual": "cheyenne_skiles@hotmail.com",
|
|
"correct": true,
|
|
"inputTokens": 6394,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1139
|
|
},
|
|
{
|
|
"questionId": "q33",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "cheyenne_skiles@hotmail.com",
|
|
"actual": "cheyenne_skiles@hotmail.com",
|
|
"correct": true,
|
|
"inputTokens": 7872,
|
|
"outputTokens": 14,
|
|
"latencyMs": 1319
|
|
},
|
|
{
|
|
"questionId": "q33",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "cheyenne_skiles@hotmail.com",
|
|
"actual": "cheyenne_skiles@hotmail.com",
|
|
"correct": true,
|
|
"inputTokens": 2531,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1856
|
|
},
|
|
{
|
|
"questionId": "q33",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "cheyenne_skiles@hotmail.com",
|
|
"actual": "cheyenne_skiles@hotmail.com",
|
|
"correct": true,
|
|
"inputTokens": 2984,
|
|
"outputTokens": 14,
|
|
"latencyMs": 1393
|
|
},
|
|
{
|
|
"questionId": "q33",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "cheyenne_skiles@hotmail.com",
|
|
"actual": "cheyenne_skiles@hotmail.com",
|
|
"correct": true,
|
|
"inputTokens": 2385,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1766
|
|
},
|
|
{
|
|
"questionId": "q33",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "cheyenne_skiles@hotmail.com",
|
|
"actual": "cheyenne_skiles@hotmail.com",
|
|
"correct": true,
|
|
"inputTokens": 2858,
|
|
"outputTokens": 14,
|
|
"latencyMs": 1609
|
|
},
|
|
{
|
|
"questionId": "q33",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "cheyenne_skiles@hotmail.com",
|
|
"actual": "cheyenne_skiles@hotmail.com",
|
|
"correct": true,
|
|
"inputTokens": 6320,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1329
|
|
},
|
|
{
|
|
"questionId": "q33",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "cheyenne_skiles@hotmail.com",
|
|
"actual": "cheyenne_skiles@hotmail.com",
|
|
"correct": true,
|
|
"inputTokens": 6367,
|
|
"outputTokens": 14,
|
|
"latencyMs": 1178
|
|
},
|
|
{
|
|
"questionId": "q33",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "cheyenne_skiles@hotmail.com",
|
|
"actual": "cheyenne_skiles@hotmail.com",
|
|
"correct": true,
|
|
"inputTokens": 5016,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1890
|
|
},
|
|
{
|
|
"questionId": "q33",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "cheyenne_skiles@hotmail.com",
|
|
"actual": "cheyenne_skiles@hotmail.com",
|
|
"correct": true,
|
|
"inputTokens": 5762,
|
|
"outputTokens": 14,
|
|
"latencyMs": 1326
|
|
},
|
|
{
|
|
"questionId": "q34",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "84650",
|
|
"actual": "84650",
|
|
"correct": true,
|
|
"inputTokens": 6392,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1898
|
|
},
|
|
{
|
|
"questionId": "q34",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "84650",
|
|
"actual": "84650",
|
|
"correct": true,
|
|
"inputTokens": 7871,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1074
|
|
},
|
|
{
|
|
"questionId": "q34",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "84650",
|
|
"actual": "84650",
|
|
"correct": true,
|
|
"inputTokens": 2529,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1382
|
|
},
|
|
{
|
|
"questionId": "q34",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "84650",
|
|
"actual": "84650",
|
|
"correct": true,
|
|
"inputTokens": 2983,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1060
|
|
},
|
|
{
|
|
"questionId": "q34",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "84650",
|
|
"actual": "84650",
|
|
"correct": true,
|
|
"inputTokens": 2383,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1286
|
|
},
|
|
{
|
|
"questionId": "q34",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "84650",
|
|
"actual": "84650",
|
|
"correct": true,
|
|
"inputTokens": 2857,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1591
|
|
},
|
|
{
|
|
"questionId": "q34",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "84650",
|
|
"actual": "84650",
|
|
"correct": true,
|
|
"inputTokens": 6318,
|
|
"outputTokens": 3,
|
|
"latencyMs": 2158
|
|
},
|
|
{
|
|
"questionId": "q34",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "84650",
|
|
"actual": "84650",
|
|
"correct": true,
|
|
"inputTokens": 6366,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1532
|
|
},
|
|
{
|
|
"questionId": "q34",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "84650",
|
|
"actual": "84650",
|
|
"correct": true,
|
|
"inputTokens": 5014,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1381
|
|
},
|
|
{
|
|
"questionId": "q34",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "84650",
|
|
"actual": "84650",
|
|
"correct": true,
|
|
"inputTokens": 5761,
|
|
"outputTokens": 6,
|
|
"latencyMs": 2262
|
|
},
|
|
{
|
|
"questionId": "q35",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 6391,
|
|
"outputTokens": 2,
|
|
"latencyMs": 2664
|
|
},
|
|
{
|
|
"questionId": "q35",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 7871,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1260
|
|
},
|
|
{
|
|
"questionId": "q35",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 2528,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1563
|
|
},
|
|
{
|
|
"questionId": "q35",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 2983,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1415
|
|
},
|
|
{
|
|
"questionId": "q35",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 2382,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1038
|
|
},
|
|
{
|
|
"questionId": "q35",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 2857,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1021
|
|
},
|
|
{
|
|
"questionId": "q35",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 6317,
|
|
"outputTokens": 2,
|
|
"latencyMs": 4276
|
|
},
|
|
{
|
|
"questionId": "q35",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 6366,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1301
|
|
},
|
|
{
|
|
"questionId": "q35",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 5013,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1399
|
|
},
|
|
{
|
|
"questionId": "q35",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 5761,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1197
|
|
},
|
|
{
|
|
"questionId": "q36",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "macey.gottlieb5@yahoo.com",
|
|
"actual": "macey.gottlieb5@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 6390,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1390
|
|
},
|
|
{
|
|
"questionId": "q36",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "macey.gottlieb5@yahoo.com",
|
|
"actual": "macey.gottlieb5@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 7869,
|
|
"outputTokens": 14,
|
|
"latencyMs": 1482
|
|
},
|
|
{
|
|
"questionId": "q36",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "macey.gottlieb5@yahoo.com",
|
|
"actual": "macey.gottlieb5@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 2527,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1754
|
|
},
|
|
{
|
|
"questionId": "q36",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "macey.gottlieb5@yahoo.com",
|
|
"actual": "macey.gottlieb5@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 2981,
|
|
"outputTokens": 14,
|
|
"latencyMs": 1100
|
|
},
|
|
{
|
|
"questionId": "q36",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "macey.gottlieb5@yahoo.com",
|
|
"actual": "macey.gottlieb5@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 2381,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1421
|
|
},
|
|
{
|
|
"questionId": "q36",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "macey.gottlieb5@yahoo.com",
|
|
"actual": "macey.gottlieb5@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 2855,
|
|
"outputTokens": 14,
|
|
"latencyMs": 2173
|
|
},
|
|
{
|
|
"questionId": "q36",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "macey.gottlieb5@yahoo.com",
|
|
"actual": "macey.gottlieb5@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 6316,
|
|
"outputTokens": 9,
|
|
"latencyMs": 2911
|
|
},
|
|
{
|
|
"questionId": "q36",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "macey.gottlieb5@yahoo.com",
|
|
"actual": "macey.gottlieb5@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 6364,
|
|
"outputTokens": 14,
|
|
"latencyMs": 1235
|
|
},
|
|
{
|
|
"questionId": "q36",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "macey.gottlieb5@yahoo.com",
|
|
"actual": "macey.gottlieb5@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 5012,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1303
|
|
},
|
|
{
|
|
"questionId": "q36",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "macey.gottlieb5@yahoo.com",
|
|
"actual": "macey.gottlieb5@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 5759,
|
|
"outputTokens": 14,
|
|
"latencyMs": 1148
|
|
},
|
|
{
|
|
"questionId": "q37",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "89773",
|
|
"actual": "89773",
|
|
"correct": true,
|
|
"inputTokens": 6390,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1430
|
|
},
|
|
{
|
|
"questionId": "q37",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "89773",
|
|
"actual": "89773",
|
|
"correct": true,
|
|
"inputTokens": 7868,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1089
|
|
},
|
|
{
|
|
"questionId": "q37",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "89773",
|
|
"actual": "89773",
|
|
"correct": true,
|
|
"inputTokens": 2527,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1059
|
|
},
|
|
{
|
|
"questionId": "q37",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "89773",
|
|
"actual": "89773",
|
|
"correct": true,
|
|
"inputTokens": 2980,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1057
|
|
},
|
|
{
|
|
"questionId": "q37",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "89773",
|
|
"actual": "89773",
|
|
"correct": true,
|
|
"inputTokens": 2381,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1716
|
|
},
|
|
{
|
|
"questionId": "q37",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "89773",
|
|
"actual": "89773",
|
|
"correct": true,
|
|
"inputTokens": 2854,
|
|
"outputTokens": 6,
|
|
"latencyMs": 904
|
|
},
|
|
{
|
|
"questionId": "q37",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "89773",
|
|
"actual": "89773",
|
|
"correct": true,
|
|
"inputTokens": 6316,
|
|
"outputTokens": 3,
|
|
"latencyMs": 2950
|
|
},
|
|
{
|
|
"questionId": "q37",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "89773",
|
|
"actual": "89773",
|
|
"correct": true,
|
|
"inputTokens": 6363,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1189
|
|
},
|
|
{
|
|
"questionId": "q37",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "89773",
|
|
"actual": "89773",
|
|
"correct": true,
|
|
"inputTokens": 5012,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1050
|
|
},
|
|
{
|
|
"questionId": "q37",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "89773",
|
|
"actual": "89773",
|
|
"correct": true,
|
|
"inputTokens": 5758,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1329
|
|
},
|
|
{
|
|
"questionId": "q38",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 6390,
|
|
"outputTokens": 2,
|
|
"latencyMs": 3410
|
|
},
|
|
{
|
|
"questionId": "q38",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 7868,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1891
|
|
},
|
|
{
|
|
"questionId": "q38",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 2527,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1010
|
|
},
|
|
{
|
|
"questionId": "q38",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 2980,
|
|
"outputTokens": 4,
|
|
"latencyMs": 988
|
|
},
|
|
{
|
|
"questionId": "q38",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 2381,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1364
|
|
},
|
|
{
|
|
"questionId": "q38",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 2854,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1395
|
|
},
|
|
{
|
|
"questionId": "q38",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 6316,
|
|
"outputTokens": 2,
|
|
"latencyMs": 2293
|
|
},
|
|
{
|
|
"questionId": "q38",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 6363,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1137
|
|
},
|
|
{
|
|
"questionId": "q38",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 5012,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1451
|
|
},
|
|
{
|
|
"questionId": "q38",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Marketing",
|
|
"actual": "Marketing",
|
|
"correct": true,
|
|
"inputTokens": 5758,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1100
|
|
},
|
|
{
|
|
"questionId": "q39",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "georgianna_renner@yahoo.com",
|
|
"actual": "georgianna_renner@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 6390,
|
|
"outputTokens": 10,
|
|
"latencyMs": 1674
|
|
},
|
|
{
|
|
"questionId": "q39",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "georgianna_renner@yahoo.com",
|
|
"actual": "georgianna_renner@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 7869,
|
|
"outputTokens": 13,
|
|
"latencyMs": 1403
|
|
},
|
|
{
|
|
"questionId": "q39",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "georgianna_renner@yahoo.com",
|
|
"actual": "georgianna_renner@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 2527,
|
|
"outputTokens": 10,
|
|
"latencyMs": 1413
|
|
},
|
|
{
|
|
"questionId": "q39",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "georgianna_renner@yahoo.com",
|
|
"actual": "georgianna_renner@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 2981,
|
|
"outputTokens": 13,
|
|
"latencyMs": 1200
|
|
},
|
|
{
|
|
"questionId": "q39",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "georgianna_renner@yahoo.com",
|
|
"actual": "georgianna_renner@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 2381,
|
|
"outputTokens": 10,
|
|
"latencyMs": 1730
|
|
},
|
|
{
|
|
"questionId": "q39",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "georgianna_renner@yahoo.com",
|
|
"actual": "georgianna_renner@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 2855,
|
|
"outputTokens": 13,
|
|
"latencyMs": 1226
|
|
},
|
|
{
|
|
"questionId": "q39",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "georgianna_renner@yahoo.com",
|
|
"actual": "georgianna_renner@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 6316,
|
|
"outputTokens": 10,
|
|
"latencyMs": 1251
|
|
},
|
|
{
|
|
"questionId": "q39",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "georgianna_renner@yahoo.com",
|
|
"actual": "georgianna_renner@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 6364,
|
|
"outputTokens": 13,
|
|
"latencyMs": 1337
|
|
},
|
|
{
|
|
"questionId": "q39",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "georgianna_renner@yahoo.com",
|
|
"actual": "georgianna_renner@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 5012,
|
|
"outputTokens": 10,
|
|
"latencyMs": 2368
|
|
},
|
|
{
|
|
"questionId": "q39",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "georgianna_renner@yahoo.com",
|
|
"actual": "georgianna_renner@yahoo.com",
|
|
"correct": true,
|
|
"inputTokens": 5759,
|
|
"outputTokens": 13,
|
|
"latencyMs": 1251
|
|
},
|
|
{
|
|
"questionId": "q40",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "49741",
|
|
"actual": "49741",
|
|
"correct": true,
|
|
"inputTokens": 6391,
|
|
"outputTokens": 3,
|
|
"latencyMs": 3815
|
|
},
|
|
{
|
|
"questionId": "q40",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "49741",
|
|
"actual": "49741",
|
|
"correct": true,
|
|
"inputTokens": 7871,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1169
|
|
},
|
|
{
|
|
"questionId": "q40",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "49741",
|
|
"actual": "49741",
|
|
"correct": true,
|
|
"inputTokens": 2528,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1070
|
|
},
|
|
{
|
|
"questionId": "q40",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "49741",
|
|
"actual": "49741",
|
|
"correct": true,
|
|
"inputTokens": 2983,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1162
|
|
},
|
|
{
|
|
"questionId": "q40",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "49741",
|
|
"actual": "49741",
|
|
"correct": true,
|
|
"inputTokens": 2382,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1115
|
|
},
|
|
{
|
|
"questionId": "q40",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "49741",
|
|
"actual": "144426",
|
|
"correct": false,
|
|
"inputTokens": 2857,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1365
|
|
},
|
|
{
|
|
"questionId": "q40",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "49741",
|
|
"actual": "49741",
|
|
"correct": true,
|
|
"inputTokens": 6317,
|
|
"outputTokens": 3,
|
|
"latencyMs": 2004
|
|
},
|
|
{
|
|
"questionId": "q40",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "49741",
|
|
"actual": "49741",
|
|
"correct": true,
|
|
"inputTokens": 6366,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1113
|
|
},
|
|
{
|
|
"questionId": "q40",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "49741",
|
|
"actual": "49741",
|
|
"correct": true,
|
|
"inputTokens": 5013,
|
|
"outputTokens": 3,
|
|
"latencyMs": 3055
|
|
},
|
|
{
|
|
"questionId": "q40",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "49741",
|
|
"actual": "49741",
|
|
"correct": true,
|
|
"inputTokens": 5761,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1392
|
|
},
|
|
{
|
|
"questionId": "q41",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "17",
|
|
"actual": "20",
|
|
"correct": false,
|
|
"inputTokens": 6388,
|
|
"outputTokens": 2,
|
|
"latencyMs": 3877
|
|
},
|
|
{
|
|
"questionId": "q41",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "17",
|
|
"actual": "15",
|
|
"correct": false,
|
|
"inputTokens": 7865,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1128
|
|
},
|
|
{
|
|
"questionId": "q41",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "17",
|
|
"actual": "20",
|
|
"correct": false,
|
|
"inputTokens": 2525,
|
|
"outputTokens": 2,
|
|
"latencyMs": 966
|
|
},
|
|
{
|
|
"questionId": "q41",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "17",
|
|
"actual": "15",
|
|
"correct": false,
|
|
"inputTokens": 2977,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1070
|
|
},
|
|
{
|
|
"questionId": "q41",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "17",
|
|
"actual": "20",
|
|
"correct": false,
|
|
"inputTokens": 2379,
|
|
"outputTokens": 2,
|
|
"latencyMs": 2411
|
|
},
|
|
{
|
|
"questionId": "q41",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "17",
|
|
"actual": "15",
|
|
"correct": false,
|
|
"inputTokens": 2851,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1286
|
|
},
|
|
{
|
|
"questionId": "q41",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "17",
|
|
"actual": "20",
|
|
"correct": false,
|
|
"inputTokens": 6314,
|
|
"outputTokens": 2,
|
|
"latencyMs": 2082
|
|
},
|
|
{
|
|
"questionId": "q41",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "17",
|
|
"actual": "15",
|
|
"correct": false,
|
|
"inputTokens": 6360,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1107
|
|
},
|
|
{
|
|
"questionId": "q41",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "17",
|
|
"actual": "20",
|
|
"correct": false,
|
|
"inputTokens": 5010,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1216
|
|
},
|
|
{
|
|
"questionId": "q41",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "17",
|
|
"actual": "15",
|
|
"correct": false,
|
|
"inputTokens": 5755,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1052
|
|
},
|
|
{
|
|
"questionId": "q42",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "17",
|
|
"actual": "20",
|
|
"correct": false,
|
|
"inputTokens": 6388,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1572
|
|
},
|
|
{
|
|
"questionId": "q42",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "17",
|
|
"actual": "15",
|
|
"correct": false,
|
|
"inputTokens": 7865,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1084
|
|
},
|
|
{
|
|
"questionId": "q42",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "17",
|
|
"actual": "20",
|
|
"correct": false,
|
|
"inputTokens": 2525,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1377
|
|
},
|
|
{
|
|
"questionId": "q42",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "17",
|
|
"actual": "14",
|
|
"correct": false,
|
|
"inputTokens": 2977,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1197
|
|
},
|
|
{
|
|
"questionId": "q42",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "17",
|
|
"actual": "20",
|
|
"correct": false,
|
|
"inputTokens": 2379,
|
|
"outputTokens": 2,
|
|
"latencyMs": 2705
|
|
},
|
|
{
|
|
"questionId": "q42",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "17",
|
|
"actual": "15",
|
|
"correct": false,
|
|
"inputTokens": 2851,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1020
|
|
},
|
|
{
|
|
"questionId": "q42",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "17",
|
|
"actual": "20",
|
|
"correct": false,
|
|
"inputTokens": 6314,
|
|
"outputTokens": 2,
|
|
"latencyMs": 5345
|
|
},
|
|
{
|
|
"questionId": "q42",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "17",
|
|
"actual": "14",
|
|
"correct": false,
|
|
"inputTokens": 6360,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1207
|
|
},
|
|
{
|
|
"questionId": "q42",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "17",
|
|
"actual": "20",
|
|
"correct": false,
|
|
"inputTokens": 5010,
|
|
"outputTokens": 2,
|
|
"latencyMs": 921
|
|
},
|
|
{
|
|
"questionId": "q42",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "17",
|
|
"actual": "15",
|
|
"correct": false,
|
|
"inputTokens": 5755,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1289
|
|
},
|
|
{
|
|
"questionId": "q43",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "17",
|
|
"actual": "20",
|
|
"correct": false,
|
|
"inputTokens": 6388,
|
|
"outputTokens": 2,
|
|
"latencyMs": 2423
|
|
},
|
|
{
|
|
"questionId": "q43",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "17",
|
|
"actual": "15",
|
|
"correct": false,
|
|
"inputTokens": 7865,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1273
|
|
},
|
|
{
|
|
"questionId": "q43",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "17",
|
|
"actual": "20",
|
|
"correct": false,
|
|
"inputTokens": 2525,
|
|
"outputTokens": 2,
|
|
"latencyMs": 975
|
|
},
|
|
{
|
|
"questionId": "q43",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "17",
|
|
"actual": "15",
|
|
"correct": false,
|
|
"inputTokens": 2977,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1301
|
|
},
|
|
{
|
|
"questionId": "q43",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "17",
|
|
"actual": "20",
|
|
"correct": false,
|
|
"inputTokens": 2379,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1423
|
|
},
|
|
{
|
|
"questionId": "q43",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "17",
|
|
"actual": "15",
|
|
"correct": false,
|
|
"inputTokens": 2851,
|
|
"outputTokens": 5,
|
|
"latencyMs": 927
|
|
},
|
|
{
|
|
"questionId": "q43",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "17",
|
|
"actual": "20",
|
|
"correct": false,
|
|
"inputTokens": 6314,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1258
|
|
},
|
|
{
|
|
"questionId": "q43",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "17",
|
|
"actual": "15",
|
|
"correct": false,
|
|
"inputTokens": 6360,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1250
|
|
},
|
|
{
|
|
"questionId": "q43",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "17",
|
|
"actual": "20",
|
|
"correct": false,
|
|
"inputTokens": 5010,
|
|
"outputTokens": 2,
|
|
"latencyMs": 872
|
|
},
|
|
{
|
|
"questionId": "q43",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "17",
|
|
"actual": "15",
|
|
"correct": false,
|
|
"inputTokens": 5755,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1385
|
|
},
|
|
{
|
|
"questionId": "q44",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "17",
|
|
"actual": "20",
|
|
"correct": false,
|
|
"inputTokens": 6388,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1201
|
|
},
|
|
{
|
|
"questionId": "q44",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "17",
|
|
"actual": "15",
|
|
"correct": false,
|
|
"inputTokens": 7865,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1149
|
|
},
|
|
{
|
|
"questionId": "q44",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "17",
|
|
"actual": "20",
|
|
"correct": false,
|
|
"inputTokens": 2525,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1498
|
|
},
|
|
{
|
|
"questionId": "q44",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "17",
|
|
"actual": "15",
|
|
"correct": false,
|
|
"inputTokens": 2977,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1149
|
|
},
|
|
{
|
|
"questionId": "q44",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "17",
|
|
"actual": "20",
|
|
"correct": false,
|
|
"inputTokens": 2379,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1098
|
|
},
|
|
{
|
|
"questionId": "q44",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "17",
|
|
"actual": "15",
|
|
"correct": false,
|
|
"inputTokens": 2851,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1121
|
|
},
|
|
{
|
|
"questionId": "q44",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "17",
|
|
"actual": "20",
|
|
"correct": false,
|
|
"inputTokens": 6314,
|
|
"outputTokens": 2,
|
|
"latencyMs": 2522
|
|
},
|
|
{
|
|
"questionId": "q44",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "17",
|
|
"actual": "10",
|
|
"correct": false,
|
|
"inputTokens": 6360,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1532
|
|
},
|
|
{
|
|
"questionId": "q44",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "17",
|
|
"actual": "20",
|
|
"correct": false,
|
|
"inputTokens": 5010,
|
|
"outputTokens": 2,
|
|
"latencyMs": 4914
|
|
},
|
|
{
|
|
"questionId": "q44",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "17",
|
|
"actual": "15",
|
|
"correct": false,
|
|
"inputTokens": 5755,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1324
|
|
},
|
|
{
|
|
"questionId": "q45",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "16",
|
|
"actual": "20",
|
|
"correct": false,
|
|
"inputTokens": 6388,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1446
|
|
},
|
|
{
|
|
"questionId": "q45",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "16",
|
|
"actual": "12",
|
|
"correct": false,
|
|
"inputTokens": 7865,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1105
|
|
},
|
|
{
|
|
"questionId": "q45",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "16",
|
|
"actual": "20",
|
|
"correct": false,
|
|
"inputTokens": 2525,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1297
|
|
},
|
|
{
|
|
"questionId": "q45",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "16",
|
|
"actual": "15",
|
|
"correct": false,
|
|
"inputTokens": 2977,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1251
|
|
},
|
|
{
|
|
"questionId": "q45",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "16",
|
|
"actual": "20",
|
|
"correct": false,
|
|
"inputTokens": 2379,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1561
|
|
},
|
|
{
|
|
"questionId": "q45",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "16",
|
|
"actual": "15",
|
|
"correct": false,
|
|
"inputTokens": 2851,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1292
|
|
},
|
|
{
|
|
"questionId": "q45",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "16",
|
|
"actual": "20",
|
|
"correct": false,
|
|
"inputTokens": 6314,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1127
|
|
},
|
|
{
|
|
"questionId": "q45",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "16",
|
|
"actual": "12",
|
|
"correct": false,
|
|
"inputTokens": 6360,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1207
|
|
},
|
|
{
|
|
"questionId": "q45",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "16",
|
|
"actual": "20",
|
|
"correct": false,
|
|
"inputTokens": 5010,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1582
|
|
},
|
|
{
|
|
"questionId": "q45",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "16",
|
|
"actual": "15",
|
|
"correct": false,
|
|
"inputTokens": 5755,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1278
|
|
},
|
|
{
|
|
"questionId": "q46",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "16",
|
|
"actual": "20",
|
|
"correct": false,
|
|
"inputTokens": 6388,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1278
|
|
},
|
|
{
|
|
"questionId": "q46",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "16",
|
|
"actual": "10",
|
|
"correct": false,
|
|
"inputTokens": 7865,
|
|
"outputTokens": 5,
|
|
"latencyMs": 3084
|
|
},
|
|
{
|
|
"questionId": "q46",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "16",
|
|
"actual": "20",
|
|
"correct": false,
|
|
"inputTokens": 2525,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1289
|
|
},
|
|
{
|
|
"questionId": "q46",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "16",
|
|
"actual": "15",
|
|
"correct": false,
|
|
"inputTokens": 2977,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1591
|
|
},
|
|
{
|
|
"questionId": "q46",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "16",
|
|
"actual": "20",
|
|
"correct": false,
|
|
"inputTokens": 2379,
|
|
"outputTokens": 2,
|
|
"latencyMs": 3038
|
|
},
|
|
{
|
|
"questionId": "q46",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "16",
|
|
"actual": "15",
|
|
"correct": false,
|
|
"inputTokens": 2851,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1447
|
|
},
|
|
{
|
|
"questionId": "q46",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "16",
|
|
"actual": "20",
|
|
"correct": false,
|
|
"inputTokens": 6314,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1224
|
|
},
|
|
{
|
|
"questionId": "q46",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "16",
|
|
"actual": "10",
|
|
"correct": false,
|
|
"inputTokens": 6360,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1250
|
|
},
|
|
{
|
|
"questionId": "q46",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "16",
|
|
"actual": "20",
|
|
"correct": false,
|
|
"inputTokens": 5010,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1364
|
|
},
|
|
{
|
|
"questionId": "q46",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "16",
|
|
"actual": "12",
|
|
"correct": false,
|
|
"inputTokens": 5755,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1560
|
|
},
|
|
{
|
|
"questionId": "q47",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "91",
|
|
"actual": "66",
|
|
"correct": false,
|
|
"inputTokens": 6393,
|
|
"outputTokens": 2,
|
|
"latencyMs": 989
|
|
},
|
|
{
|
|
"questionId": "q47",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "91",
|
|
"actual": "89",
|
|
"correct": false,
|
|
"inputTokens": 7870,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1358
|
|
},
|
|
{
|
|
"questionId": "q47",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "91",
|
|
"actual": "66",
|
|
"correct": false,
|
|
"inputTokens": 2530,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1406
|
|
},
|
|
{
|
|
"questionId": "q47",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "91",
|
|
"actual": "85",
|
|
"correct": false,
|
|
"inputTokens": 2982,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1123
|
|
},
|
|
{
|
|
"questionId": "q47",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "91",
|
|
"actual": "66",
|
|
"correct": false,
|
|
"inputTokens": 2384,
|
|
"outputTokens": 2,
|
|
"latencyMs": 4883
|
|
},
|
|
{
|
|
"questionId": "q47",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "91",
|
|
"actual": "85",
|
|
"correct": false,
|
|
"inputTokens": 2856,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1402
|
|
},
|
|
{
|
|
"questionId": "q47",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "91",
|
|
"actual": "66",
|
|
"correct": false,
|
|
"inputTokens": 6319,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1915
|
|
},
|
|
{
|
|
"questionId": "q47",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "91",
|
|
"actual": "89",
|
|
"correct": false,
|
|
"inputTokens": 6365,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1263
|
|
},
|
|
{
|
|
"questionId": "q47",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "91",
|
|
"actual": "66",
|
|
"correct": false,
|
|
"inputTokens": 5015,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1448
|
|
},
|
|
{
|
|
"questionId": "q47",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "91",
|
|
"actual": "89",
|
|
"correct": false,
|
|
"inputTokens": 5760,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1243
|
|
},
|
|
{
|
|
"questionId": "q48",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "67",
|
|
"actual": "54",
|
|
"correct": false,
|
|
"inputTokens": 6393,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1456
|
|
},
|
|
{
|
|
"questionId": "q48",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "67",
|
|
"actual": "57",
|
|
"correct": false,
|
|
"inputTokens": 7870,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1186
|
|
},
|
|
{
|
|
"questionId": "q48",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "67",
|
|
"actual": "54",
|
|
"correct": false,
|
|
"inputTokens": 2530,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1076
|
|
},
|
|
{
|
|
"questionId": "q48",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "67",
|
|
"actual": "47",
|
|
"correct": false,
|
|
"inputTokens": 2982,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1168
|
|
},
|
|
{
|
|
"questionId": "q48",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "67",
|
|
"actual": "56",
|
|
"correct": false,
|
|
"inputTokens": 2384,
|
|
"outputTokens": 2,
|
|
"latencyMs": 3105
|
|
},
|
|
{
|
|
"questionId": "q48",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "67",
|
|
"actual": "47",
|
|
"correct": false,
|
|
"inputTokens": 2856,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1375
|
|
},
|
|
{
|
|
"questionId": "q48",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "67",
|
|
"actual": "66",
|
|
"correct": false,
|
|
"inputTokens": 6319,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1618
|
|
},
|
|
{
|
|
"questionId": "q48",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "67",
|
|
"actual": "47",
|
|
"correct": false,
|
|
"inputTokens": 6365,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1454
|
|
},
|
|
{
|
|
"questionId": "q48",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "67",
|
|
"actual": "54",
|
|
"correct": false,
|
|
"inputTokens": 5015,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1244
|
|
},
|
|
{
|
|
"questionId": "q48",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "67",
|
|
"actual": "57",
|
|
"correct": false,
|
|
"inputTokens": 5760,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1113
|
|
},
|
|
{
|
|
"questionId": "q49",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "41",
|
|
"actual": "30",
|
|
"correct": false,
|
|
"inputTokens": 6393,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1267
|
|
},
|
|
{
|
|
"questionId": "q49",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "41",
|
|
"actual": "31",
|
|
"correct": false,
|
|
"inputTokens": 7870,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1227
|
|
},
|
|
{
|
|
"questionId": "q49",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "41",
|
|
"actual": "30",
|
|
"correct": false,
|
|
"inputTokens": 2530,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1246
|
|
},
|
|
{
|
|
"questionId": "q49",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "41",
|
|
"actual": "27",
|
|
"correct": false,
|
|
"inputTokens": 2982,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1127
|
|
},
|
|
{
|
|
"questionId": "q49",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "41",
|
|
"actual": "34",
|
|
"correct": false,
|
|
"inputTokens": 2384,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1260
|
|
},
|
|
{
|
|
"questionId": "q49",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "41",
|
|
"actual": "31",
|
|
"correct": false,
|
|
"inputTokens": 2856,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1293
|
|
},
|
|
{
|
|
"questionId": "q49",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "41",
|
|
"actual": "24",
|
|
"correct": false,
|
|
"inputTokens": 6319,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1246
|
|
},
|
|
{
|
|
"questionId": "q49",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "41",
|
|
"actual": "27",
|
|
"correct": false,
|
|
"inputTokens": 6365,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1598
|
|
},
|
|
{
|
|
"questionId": "q49",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "41",
|
|
"actual": "24",
|
|
"correct": false,
|
|
"inputTokens": 5015,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1471
|
|
},
|
|
{
|
|
"questionId": "q49",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "41",
|
|
"actual": "31",
|
|
"correct": false,
|
|
"inputTokens": 5760,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1311
|
|
},
|
|
{
|
|
"questionId": "q50",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "26",
|
|
"actual": "22",
|
|
"correct": false,
|
|
"inputTokens": 6393,
|
|
"outputTokens": 2,
|
|
"latencyMs": 3950
|
|
},
|
|
{
|
|
"questionId": "q50",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "26",
|
|
"actual": "20",
|
|
"correct": false,
|
|
"inputTokens": 7870,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1075
|
|
},
|
|
{
|
|
"questionId": "q50",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "26",
|
|
"actual": "22",
|
|
"correct": false,
|
|
"inputTokens": 2530,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1868
|
|
},
|
|
{
|
|
"questionId": "q50",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "26",
|
|
"actual": "16",
|
|
"correct": false,
|
|
"inputTokens": 2982,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1075
|
|
},
|
|
{
|
|
"questionId": "q50",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "26",
|
|
"actual": "24",
|
|
"correct": false,
|
|
"inputTokens": 2384,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1973
|
|
},
|
|
{
|
|
"questionId": "q50",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "26",
|
|
"actual": "16",
|
|
"correct": false,
|
|
"inputTokens": 2856,
|
|
"outputTokens": 5,
|
|
"latencyMs": 947
|
|
},
|
|
{
|
|
"questionId": "q50",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "26",
|
|
"actual": "22",
|
|
"correct": false,
|
|
"inputTokens": 6319,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1414
|
|
},
|
|
{
|
|
"questionId": "q50",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "26",
|
|
"actual": "16",
|
|
"correct": false,
|
|
"inputTokens": 6365,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1221
|
|
},
|
|
{
|
|
"questionId": "q50",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "26",
|
|
"actual": "18",
|
|
"correct": false,
|
|
"inputTokens": 5015,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1148
|
|
},
|
|
{
|
|
"questionId": "q50",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "26",
|
|
"actual": "20",
|
|
"correct": false,
|
|
"inputTokens": 5760,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1286
|
|
},
|
|
{
|
|
"questionId": "q51",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "78",
|
|
"actual": "66",
|
|
"correct": false,
|
|
"inputTokens": 6387,
|
|
"outputTokens": 2,
|
|
"latencyMs": 2525
|
|
},
|
|
{
|
|
"questionId": "q51",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "78",
|
|
"actual": "81",
|
|
"correct": false,
|
|
"inputTokens": 7864,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1613
|
|
},
|
|
{
|
|
"questionId": "q51",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "78",
|
|
"actual": "66",
|
|
"correct": false,
|
|
"inputTokens": 2524,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1132
|
|
},
|
|
{
|
|
"questionId": "q51",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "78",
|
|
"actual": "78",
|
|
"correct": true,
|
|
"inputTokens": 2976,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1104
|
|
},
|
|
{
|
|
"questionId": "q51",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "78",
|
|
"actual": "77",
|
|
"correct": false,
|
|
"inputTokens": 2378,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1069
|
|
},
|
|
{
|
|
"questionId": "q51",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "78",
|
|
"actual": "73",
|
|
"correct": false,
|
|
"inputTokens": 2850,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1113
|
|
},
|
|
{
|
|
"questionId": "q51",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "78",
|
|
"actual": "66",
|
|
"correct": false,
|
|
"inputTokens": 6313,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1999
|
|
},
|
|
{
|
|
"questionId": "q51",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "78",
|
|
"actual": "78",
|
|
"correct": true,
|
|
"inputTokens": 6359,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1214
|
|
},
|
|
{
|
|
"questionId": "q51",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "78",
|
|
"actual": "66",
|
|
"correct": false,
|
|
"inputTokens": 5009,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1613
|
|
},
|
|
{
|
|
"questionId": "q51",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "78",
|
|
"actual": "77",
|
|
"correct": false,
|
|
"inputTokens": 5754,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1012
|
|
},
|
|
{
|
|
"questionId": "q52",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "22",
|
|
"actual": "30",
|
|
"correct": false,
|
|
"inputTokens": 6387,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1580
|
|
},
|
|
{
|
|
"questionId": "q52",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "22",
|
|
"actual": "15",
|
|
"correct": false,
|
|
"inputTokens": 7864,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1688
|
|
},
|
|
{
|
|
"questionId": "q52",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "22",
|
|
"actual": "22",
|
|
"correct": true,
|
|
"inputTokens": 2524,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1290
|
|
},
|
|
{
|
|
"questionId": "q52",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "22",
|
|
"actual": "16",
|
|
"correct": false,
|
|
"inputTokens": 2976,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1121
|
|
},
|
|
{
|
|
"questionId": "q52",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "22",
|
|
"actual": "10",
|
|
"correct": false,
|
|
"inputTokens": 2378,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1544
|
|
},
|
|
{
|
|
"questionId": "q52",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "22",
|
|
"actual": "20",
|
|
"correct": false,
|
|
"inputTokens": 2850,
|
|
"outputTokens": 5,
|
|
"latencyMs": 822
|
|
},
|
|
{
|
|
"questionId": "q52",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "22",
|
|
"actual": "34",
|
|
"correct": false,
|
|
"inputTokens": 6313,
|
|
"outputTokens": 2,
|
|
"latencyMs": 2718
|
|
},
|
|
{
|
|
"questionId": "q52",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "22",
|
|
"actual": "15",
|
|
"correct": false,
|
|
"inputTokens": 6359,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1211
|
|
},
|
|
{
|
|
"questionId": "q52",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "22",
|
|
"actual": "34",
|
|
"correct": false,
|
|
"inputTokens": 5009,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1162
|
|
},
|
|
{
|
|
"questionId": "q52",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "22",
|
|
"actual": "16",
|
|
"correct": false,
|
|
"inputTokens": 5754,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1156
|
|
},
|
|
{
|
|
"questionId": "q53",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "12",
|
|
"actual": "24",
|
|
"correct": false,
|
|
"inputTokens": 6395,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1089
|
|
},
|
|
{
|
|
"questionId": "q53",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "12",
|
|
"actual": "9",
|
|
"correct": false,
|
|
"inputTokens": 7872,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1368
|
|
},
|
|
{
|
|
"questionId": "q53",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "12",
|
|
"actual": "24",
|
|
"correct": false,
|
|
"inputTokens": 2532,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1850
|
|
},
|
|
{
|
|
"questionId": "q53",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "12",
|
|
"actual": "9",
|
|
"correct": false,
|
|
"inputTokens": 2984,
|
|
"outputTokens": 5,
|
|
"latencyMs": 914
|
|
},
|
|
{
|
|
"questionId": "q53",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "12",
|
|
"actual": "34",
|
|
"correct": false,
|
|
"inputTokens": 2386,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1156
|
|
},
|
|
{
|
|
"questionId": "q53",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "12",
|
|
"actual": "10",
|
|
"correct": false,
|
|
"inputTokens": 2858,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1118
|
|
},
|
|
{
|
|
"questionId": "q53",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "12",
|
|
"actual": "22",
|
|
"correct": false,
|
|
"inputTokens": 6321,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1020
|
|
},
|
|
{
|
|
"questionId": "q53",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "12",
|
|
"actual": "8",
|
|
"correct": false,
|
|
"inputTokens": 6367,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1021
|
|
},
|
|
{
|
|
"questionId": "q53",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "12",
|
|
"actual": "18",
|
|
"correct": false,
|
|
"inputTokens": 5017,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1236
|
|
},
|
|
{
|
|
"questionId": "q53",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "12",
|
|
"actual": "10",
|
|
"correct": false,
|
|
"inputTokens": 5762,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1574
|
|
},
|
|
{
|
|
"questionId": "q54",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "11",
|
|
"actual": "24",
|
|
"correct": false,
|
|
"inputTokens": 6395,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1437
|
|
},
|
|
{
|
|
"questionId": "q54",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "11",
|
|
"actual": "7",
|
|
"correct": false,
|
|
"inputTokens": 7872,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1091
|
|
},
|
|
{
|
|
"questionId": "q54",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "11",
|
|
"actual": "24",
|
|
"correct": false,
|
|
"inputTokens": 2532,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1917
|
|
},
|
|
{
|
|
"questionId": "q54",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "11",
|
|
"actual": "6",
|
|
"correct": false,
|
|
"inputTokens": 2984,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1095
|
|
},
|
|
{
|
|
"questionId": "q54",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "11",
|
|
"actual": "34",
|
|
"correct": false,
|
|
"inputTokens": 2386,
|
|
"outputTokens": 2,
|
|
"latencyMs": 4230
|
|
},
|
|
{
|
|
"questionId": "q54",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "11",
|
|
"actual": "8",
|
|
"correct": false,
|
|
"inputTokens": 2858,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1187
|
|
},
|
|
{
|
|
"questionId": "q54",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "11",
|
|
"actual": "24",
|
|
"correct": false,
|
|
"inputTokens": 6321,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1197
|
|
},
|
|
{
|
|
"questionId": "q54",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "11",
|
|
"actual": "6",
|
|
"correct": false,
|
|
"inputTokens": 6367,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1176
|
|
},
|
|
{
|
|
"questionId": "q54",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "11",
|
|
"actual": "18",
|
|
"correct": false,
|
|
"inputTokens": 5017,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1249
|
|
},
|
|
{
|
|
"questionId": "q54",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "11",
|
|
"actual": "8",
|
|
"correct": false,
|
|
"inputTokens": 5762,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1383
|
|
},
|
|
{
|
|
"questionId": "q55",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "11",
|
|
"actual": "30",
|
|
"correct": false,
|
|
"inputTokens": 6395,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1149
|
|
},
|
|
{
|
|
"questionId": "q55",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "11",
|
|
"actual": "8",
|
|
"correct": false,
|
|
"inputTokens": 7872,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1072
|
|
},
|
|
{
|
|
"questionId": "q55",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "11",
|
|
"actual": "18",
|
|
"correct": false,
|
|
"inputTokens": 2532,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1213
|
|
},
|
|
{
|
|
"questionId": "q55",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "11",
|
|
"actual": "7",
|
|
"correct": false,
|
|
"inputTokens": 2984,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1507
|
|
},
|
|
{
|
|
"questionId": "q55",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "11",
|
|
"actual": "34",
|
|
"correct": false,
|
|
"inputTokens": 2386,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1826
|
|
},
|
|
{
|
|
"questionId": "q55",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "11",
|
|
"actual": "8",
|
|
"correct": false,
|
|
"inputTokens": 2858,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1162
|
|
},
|
|
{
|
|
"questionId": "q55",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "11",
|
|
"actual": "24",
|
|
"correct": false,
|
|
"inputTokens": 6321,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1008
|
|
},
|
|
{
|
|
"questionId": "q55",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "11",
|
|
"actual": "7",
|
|
"correct": false,
|
|
"inputTokens": 6367,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1285
|
|
},
|
|
{
|
|
"questionId": "q55",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "11",
|
|
"actual": "22",
|
|
"correct": false,
|
|
"inputTokens": 5017,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1124
|
|
},
|
|
{
|
|
"questionId": "q55",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "11",
|
|
"actual": "9",
|
|
"correct": false,
|
|
"inputTokens": 5762,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1212
|
|
},
|
|
{
|
|
"questionId": "q56",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "12",
|
|
"actual": "22",
|
|
"correct": false,
|
|
"inputTokens": 6395,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1232
|
|
},
|
|
{
|
|
"questionId": "q56",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "12",
|
|
"actual": "7",
|
|
"correct": false,
|
|
"inputTokens": 7872,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1792
|
|
},
|
|
{
|
|
"questionId": "q56",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "12",
|
|
"actual": "12",
|
|
"correct": true,
|
|
"inputTokens": 2532,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1357
|
|
},
|
|
{
|
|
"questionId": "q56",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "12",
|
|
"actual": "6",
|
|
"correct": false,
|
|
"inputTokens": 2984,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1247
|
|
},
|
|
{
|
|
"questionId": "q56",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "12",
|
|
"actual": "22",
|
|
"correct": false,
|
|
"inputTokens": 2386,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1043
|
|
},
|
|
{
|
|
"questionId": "q56",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "12",
|
|
"actual": "7",
|
|
"correct": false,
|
|
"inputTokens": 2858,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1065
|
|
},
|
|
{
|
|
"questionId": "q56",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "12",
|
|
"actual": "10",
|
|
"correct": false,
|
|
"inputTokens": 6321,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1298
|
|
},
|
|
{
|
|
"questionId": "q56",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "12",
|
|
"actual": "7",
|
|
"correct": false,
|
|
"inputTokens": 6367,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1767
|
|
},
|
|
{
|
|
"questionId": "q56",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "12",
|
|
"actual": "10",
|
|
"correct": false,
|
|
"inputTokens": 5017,
|
|
"outputTokens": 2,
|
|
"latencyMs": 3525
|
|
},
|
|
{
|
|
"questionId": "q56",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "12",
|
|
"actual": "8",
|
|
"correct": false,
|
|
"inputTokens": 5762,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1355
|
|
},
|
|
{
|
|
"questionId": "q57",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "62",
|
|
"actual": "54",
|
|
"correct": false,
|
|
"inputTokens": 6394,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1359
|
|
},
|
|
{
|
|
"questionId": "q57",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "62",
|
|
"actual": "62",
|
|
"correct": true,
|
|
"inputTokens": 7872,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1447
|
|
},
|
|
{
|
|
"questionId": "q57",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "62",
|
|
"actual": "54",
|
|
"correct": false,
|
|
"inputTokens": 2531,
|
|
"outputTokens": 2,
|
|
"latencyMs": 3832
|
|
},
|
|
{
|
|
"questionId": "q57",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "62",
|
|
"actual": "62",
|
|
"correct": true,
|
|
"inputTokens": 2984,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1143
|
|
},
|
|
{
|
|
"questionId": "q57",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "62",
|
|
"actual": "66",
|
|
"correct": false,
|
|
"inputTokens": 2385,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1370
|
|
},
|
|
{
|
|
"questionId": "q57",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "62",
|
|
"actual": "62",
|
|
"correct": true,
|
|
"inputTokens": 2858,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1042
|
|
},
|
|
{
|
|
"questionId": "q57",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "62",
|
|
"actual": "54",
|
|
"correct": false,
|
|
"inputTokens": 6320,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1015
|
|
},
|
|
{
|
|
"questionId": "q57",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "62",
|
|
"actual": "62",
|
|
"correct": true,
|
|
"inputTokens": 6367,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1395
|
|
},
|
|
{
|
|
"questionId": "q57",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "62",
|
|
"actual": "54",
|
|
"correct": false,
|
|
"inputTokens": 5016,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1008
|
|
},
|
|
{
|
|
"questionId": "q57",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "62",
|
|
"actual": "62",
|
|
"correct": true,
|
|
"inputTokens": 5762,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1191
|
|
},
|
|
{
|
|
"questionId": "q58",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "45",
|
|
"actual": "38",
|
|
"correct": false,
|
|
"inputTokens": 6394,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1304
|
|
},
|
|
{
|
|
"questionId": "q58",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "45",
|
|
"actual": "42",
|
|
"correct": false,
|
|
"inputTokens": 7872,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1386
|
|
},
|
|
{
|
|
"questionId": "q58",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "45",
|
|
"actual": "38",
|
|
"correct": false,
|
|
"inputTokens": 2531,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1433
|
|
},
|
|
{
|
|
"questionId": "q58",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "45",
|
|
"actual": "42",
|
|
"correct": false,
|
|
"inputTokens": 2984,
|
|
"outputTokens": 5,
|
|
"latencyMs": 967
|
|
},
|
|
{
|
|
"questionId": "q58",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "45",
|
|
"actual": "42",
|
|
"correct": false,
|
|
"inputTokens": 2385,
|
|
"outputTokens": 2,
|
|
"latencyMs": 2469
|
|
},
|
|
{
|
|
"questionId": "q58",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "45",
|
|
"actual": "42",
|
|
"correct": false,
|
|
"inputTokens": 2858,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1382
|
|
},
|
|
{
|
|
"questionId": "q58",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "45",
|
|
"actual": "38",
|
|
"correct": false,
|
|
"inputTokens": 6320,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1658
|
|
},
|
|
{
|
|
"questionId": "q58",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "45",
|
|
"actual": "42",
|
|
"correct": false,
|
|
"inputTokens": 6367,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1450
|
|
},
|
|
{
|
|
"questionId": "q58",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "45",
|
|
"actual": "38",
|
|
"correct": false,
|
|
"inputTokens": 5016,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1428
|
|
},
|
|
{
|
|
"questionId": "q58",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "45",
|
|
"actual": "38",
|
|
"correct": false,
|
|
"inputTokens": 5762,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1144
|
|
},
|
|
{
|
|
"questionId": "q59",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "96.17",
|
|
"actual": "96.17",
|
|
"correct": true,
|
|
"inputTokens": 9740,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1577
|
|
},
|
|
{
|
|
"questionId": "q59",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "96.17",
|
|
"actual": "96.17",
|
|
"correct": true,
|
|
"inputTokens": 11907,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1181
|
|
},
|
|
{
|
|
"questionId": "q59",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "96.17",
|
|
"actual": "96.17",
|
|
"correct": true,
|
|
"inputTokens": 6014,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1231
|
|
},
|
|
{
|
|
"questionId": "q59",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "96.17",
|
|
"actual": "96.17",
|
|
"correct": true,
|
|
"inputTokens": 6993,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1407
|
|
},
|
|
{
|
|
"questionId": "q59",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "96.17",
|
|
"actual": "96.17",
|
|
"correct": true,
|
|
"inputTokens": 6782,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1393
|
|
},
|
|
{
|
|
"questionId": "q59",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "96.17",
|
|
"actual": "96.17",
|
|
"correct": true,
|
|
"inputTokens": 8414,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1534
|
|
},
|
|
{
|
|
"questionId": "q59",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "96.17",
|
|
"actual": "96.17",
|
|
"correct": true,
|
|
"inputTokens": 9159,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1456
|
|
},
|
|
{
|
|
"questionId": "q59",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "96.17",
|
|
"actual": "96.17",
|
|
"correct": true,
|
|
"inputTokens": 9289,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1933
|
|
},
|
|
{
|
|
"questionId": "q59",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "96.17",
|
|
"actual": "96.17",
|
|
"correct": true,
|
|
"inputTokens": 7374,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1472
|
|
},
|
|
{
|
|
"questionId": "q59",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "96.17",
|
|
"actual": "96.17",
|
|
"correct": true,
|
|
"inputTokens": 8385,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1224
|
|
},
|
|
{
|
|
"questionId": "q60",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "shipped",
|
|
"actual": "shipped",
|
|
"correct": true,
|
|
"inputTokens": 9739,
|
|
"outputTokens": 3,
|
|
"latencyMs": 2069
|
|
},
|
|
{
|
|
"questionId": "q60",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "shipped",
|
|
"actual": "shipped",
|
|
"correct": true,
|
|
"inputTokens": 11906,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1172
|
|
},
|
|
{
|
|
"questionId": "q60",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "shipped",
|
|
"actual": "shipped",
|
|
"correct": true,
|
|
"inputTokens": 6013,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1236
|
|
},
|
|
{
|
|
"questionId": "q60",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "shipped",
|
|
"actual": "shipped",
|
|
"correct": true,
|
|
"inputTokens": 6992,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1157
|
|
},
|
|
{
|
|
"questionId": "q60",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "shipped",
|
|
"actual": "shipped",
|
|
"correct": true,
|
|
"inputTokens": 6781,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1364
|
|
},
|
|
{
|
|
"questionId": "q60",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "shipped",
|
|
"actual": "shipped",
|
|
"correct": true,
|
|
"inputTokens": 8413,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1041
|
|
},
|
|
{
|
|
"questionId": "q60",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "shipped",
|
|
"actual": "shipped",
|
|
"correct": true,
|
|
"inputTokens": 9158,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1478
|
|
},
|
|
{
|
|
"questionId": "q60",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "shipped",
|
|
"actual": "shipped",
|
|
"correct": true,
|
|
"inputTokens": 9288,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1266
|
|
},
|
|
{
|
|
"questionId": "q60",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "shipped",
|
|
"actual": "shipped",
|
|
"correct": true,
|
|
"inputTokens": 7373,
|
|
"outputTokens": 3,
|
|
"latencyMs": 3477
|
|
},
|
|
{
|
|
"questionId": "q60",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "shipped",
|
|
"actual": "shipped",
|
|
"correct": true,
|
|
"inputTokens": 8384,
|
|
"outputTokens": 4,
|
|
"latencyMs": 2630
|
|
},
|
|
{
|
|
"questionId": "q61",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "599.39",
|
|
"actual": "599.39",
|
|
"correct": true,
|
|
"inputTokens": 9740,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1479
|
|
},
|
|
{
|
|
"questionId": "q61",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "599.39",
|
|
"actual": "599.39",
|
|
"correct": true,
|
|
"inputTokens": 11907,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1270
|
|
},
|
|
{
|
|
"questionId": "q61",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "599.39",
|
|
"actual": "599.39",
|
|
"correct": true,
|
|
"inputTokens": 6014,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1270
|
|
},
|
|
{
|
|
"questionId": "q61",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "599.39",
|
|
"actual": "599.39",
|
|
"correct": true,
|
|
"inputTokens": 6993,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1342
|
|
},
|
|
{
|
|
"questionId": "q61",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "599.39",
|
|
"actual": "599.39",
|
|
"correct": true,
|
|
"inputTokens": 6782,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1350
|
|
},
|
|
{
|
|
"questionId": "q61",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "599.39",
|
|
"actual": "599.39",
|
|
"correct": true,
|
|
"inputTokens": 8414,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1205
|
|
},
|
|
{
|
|
"questionId": "q61",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "599.39",
|
|
"actual": "599.39",
|
|
"correct": true,
|
|
"inputTokens": 9159,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1502
|
|
},
|
|
{
|
|
"questionId": "q61",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "599.39",
|
|
"actual": "599.39",
|
|
"correct": true,
|
|
"inputTokens": 9289,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1571
|
|
},
|
|
{
|
|
"questionId": "q61",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "599.39",
|
|
"actual": "599.39",
|
|
"correct": true,
|
|
"inputTokens": 7374,
|
|
"outputTokens": 4,
|
|
"latencyMs": 2013
|
|
},
|
|
{
|
|
"questionId": "q61",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "599.39",
|
|
"actual": "599.39",
|
|
"correct": true,
|
|
"inputTokens": 8385,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1428
|
|
},
|
|
{
|
|
"questionId": "q62",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "processing",
|
|
"actual": "processing",
|
|
"correct": true,
|
|
"inputTokens": 9739,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1666
|
|
},
|
|
{
|
|
"questionId": "q62",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "processing",
|
|
"actual": "processing",
|
|
"correct": true,
|
|
"inputTokens": 11906,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1549
|
|
},
|
|
{
|
|
"questionId": "q62",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "processing",
|
|
"actual": "processing",
|
|
"correct": true,
|
|
"inputTokens": 6013,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1033
|
|
},
|
|
{
|
|
"questionId": "q62",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "processing",
|
|
"actual": "processing",
|
|
"correct": true,
|
|
"inputTokens": 6992,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1061
|
|
},
|
|
{
|
|
"questionId": "q62",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "processing",
|
|
"actual": "processing",
|
|
"correct": true,
|
|
"inputTokens": 6781,
|
|
"outputTokens": 2,
|
|
"latencyMs": 2008
|
|
},
|
|
{
|
|
"questionId": "q62",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "processing",
|
|
"actual": "processing",
|
|
"correct": true,
|
|
"inputTokens": 8413,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1214
|
|
},
|
|
{
|
|
"questionId": "q62",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "processing",
|
|
"actual": "processing",
|
|
"correct": true,
|
|
"inputTokens": 9158,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1321
|
|
},
|
|
{
|
|
"questionId": "q62",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "processing",
|
|
"actual": "processing",
|
|
"correct": true,
|
|
"inputTokens": 9288,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1311
|
|
},
|
|
{
|
|
"questionId": "q62",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "processing",
|
|
"actual": "processing",
|
|
"correct": true,
|
|
"inputTokens": 7373,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1769
|
|
},
|
|
{
|
|
"questionId": "q62",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "processing",
|
|
"actual": "processing",
|
|
"correct": true,
|
|
"inputTokens": 8384,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1157
|
|
},
|
|
{
|
|
"questionId": "q63",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "528.71",
|
|
"actual": "528.71",
|
|
"correct": true,
|
|
"inputTokens": 9740,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1213
|
|
},
|
|
{
|
|
"questionId": "q63",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "528.71",
|
|
"actual": "528.71",
|
|
"correct": true,
|
|
"inputTokens": 11907,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1332
|
|
},
|
|
{
|
|
"questionId": "q63",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "528.71",
|
|
"actual": "528.71",
|
|
"correct": true,
|
|
"inputTokens": 6014,
|
|
"outputTokens": 4,
|
|
"latencyMs": 3749
|
|
},
|
|
{
|
|
"questionId": "q63",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "528.71",
|
|
"actual": "528.71",
|
|
"correct": true,
|
|
"inputTokens": 6993,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1326
|
|
},
|
|
{
|
|
"questionId": "q63",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "528.71",
|
|
"actual": "528.71",
|
|
"correct": true,
|
|
"inputTokens": 6782,
|
|
"outputTokens": 4,
|
|
"latencyMs": 947
|
|
},
|
|
{
|
|
"questionId": "q63",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "528.71",
|
|
"actual": "528.71",
|
|
"correct": true,
|
|
"inputTokens": 8414,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1251
|
|
},
|
|
{
|
|
"questionId": "q63",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "528.71",
|
|
"actual": "528.71",
|
|
"correct": true,
|
|
"inputTokens": 9159,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1428
|
|
},
|
|
{
|
|
"questionId": "q63",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "528.71",
|
|
"actual": "528.71",
|
|
"correct": true,
|
|
"inputTokens": 9289,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1659
|
|
},
|
|
{
|
|
"questionId": "q63",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "528.71",
|
|
"actual": "528.71",
|
|
"correct": true,
|
|
"inputTokens": 7374,
|
|
"outputTokens": 4,
|
|
"latencyMs": 5584
|
|
},
|
|
{
|
|
"questionId": "q63",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "528.71",
|
|
"actual": "528.71",
|
|
"correct": true,
|
|
"inputTokens": 8385,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1251
|
|
},
|
|
{
|
|
"questionId": "q64",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "pending",
|
|
"actual": "pending",
|
|
"correct": true,
|
|
"inputTokens": 9739,
|
|
"outputTokens": 2,
|
|
"latencyMs": 2425
|
|
},
|
|
{
|
|
"questionId": "q64",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "pending",
|
|
"actual": "pending",
|
|
"correct": true,
|
|
"inputTokens": 11906,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1481
|
|
},
|
|
{
|
|
"questionId": "q64",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "pending",
|
|
"actual": "pending",
|
|
"correct": true,
|
|
"inputTokens": 6013,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1109
|
|
},
|
|
{
|
|
"questionId": "q64",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "pending",
|
|
"actual": "pending",
|
|
"correct": true,
|
|
"inputTokens": 6992,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1048
|
|
},
|
|
{
|
|
"questionId": "q64",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "pending",
|
|
"actual": "pending",
|
|
"correct": true,
|
|
"inputTokens": 6781,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1256
|
|
},
|
|
{
|
|
"questionId": "q64",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "pending",
|
|
"actual": "pending",
|
|
"correct": true,
|
|
"inputTokens": 8413,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1117
|
|
},
|
|
{
|
|
"questionId": "q64",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "pending",
|
|
"actual": "pending",
|
|
"correct": true,
|
|
"inputTokens": 9158,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1168
|
|
},
|
|
{
|
|
"questionId": "q64",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "pending",
|
|
"actual": "pending",
|
|
"correct": true,
|
|
"inputTokens": 9288,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1504
|
|
},
|
|
{
|
|
"questionId": "q64",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "pending",
|
|
"actual": "pending",
|
|
"correct": true,
|
|
"inputTokens": 7373,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1134
|
|
},
|
|
{
|
|
"questionId": "q64",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "pending",
|
|
"actual": "pending",
|
|
"correct": true,
|
|
"inputTokens": 8384,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1059
|
|
},
|
|
{
|
|
"questionId": "q65",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "1687.82",
|
|
"actual": "1687.82",
|
|
"correct": true,
|
|
"inputTokens": 9740,
|
|
"outputTokens": 5,
|
|
"latencyMs": 2361
|
|
},
|
|
{
|
|
"questionId": "q65",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "1687.82",
|
|
"actual": "1687.82",
|
|
"correct": true,
|
|
"inputTokens": 11907,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1158
|
|
},
|
|
{
|
|
"questionId": "q65",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "1687.82",
|
|
"actual": "1687.82",
|
|
"correct": true,
|
|
"inputTokens": 6014,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1493
|
|
},
|
|
{
|
|
"questionId": "q65",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "1687.82",
|
|
"actual": "1687.82",
|
|
"correct": true,
|
|
"inputTokens": 6993,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1068
|
|
},
|
|
{
|
|
"questionId": "q65",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "1687.82",
|
|
"actual": "1687.82",
|
|
"correct": true,
|
|
"inputTokens": 6782,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1490
|
|
},
|
|
{
|
|
"questionId": "q65",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "1687.82",
|
|
"actual": "1687.82",
|
|
"correct": true,
|
|
"inputTokens": 8414,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1386
|
|
},
|
|
{
|
|
"questionId": "q65",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "1687.82",
|
|
"actual": "1687.82",
|
|
"correct": true,
|
|
"inputTokens": 9159,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1470
|
|
},
|
|
{
|
|
"questionId": "q65",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "1687.82",
|
|
"actual": "1687.82",
|
|
"correct": true,
|
|
"inputTokens": 9289,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1189
|
|
},
|
|
{
|
|
"questionId": "q65",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "1687.82",
|
|
"actual": "1687.82",
|
|
"correct": true,
|
|
"inputTokens": 7374,
|
|
"outputTokens": 5,
|
|
"latencyMs": 2824
|
|
},
|
|
{
|
|
"questionId": "q65",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "1687.82",
|
|
"actual": "1687.82",
|
|
"correct": true,
|
|
"inputTokens": 8385,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1565
|
|
},
|
|
{
|
|
"questionId": "q66",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "cancelled",
|
|
"actual": "cancelled",
|
|
"correct": true,
|
|
"inputTokens": 9739,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1480
|
|
},
|
|
{
|
|
"questionId": "q66",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "cancelled",
|
|
"actual": "cancelled",
|
|
"correct": true,
|
|
"inputTokens": 11906,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1354
|
|
},
|
|
{
|
|
"questionId": "q66",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "cancelled",
|
|
"actual": "cancelled",
|
|
"correct": true,
|
|
"inputTokens": 6013,
|
|
"outputTokens": 3,
|
|
"latencyMs": 5334
|
|
},
|
|
{
|
|
"questionId": "q66",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "cancelled",
|
|
"actual": "cancelled",
|
|
"correct": true,
|
|
"inputTokens": 6992,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1158
|
|
},
|
|
{
|
|
"questionId": "q66",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "cancelled",
|
|
"actual": "cancelled",
|
|
"correct": true,
|
|
"inputTokens": 6781,
|
|
"outputTokens": 3,
|
|
"latencyMs": 2043
|
|
},
|
|
{
|
|
"questionId": "q66",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "cancelled",
|
|
"actual": "cancelled",
|
|
"correct": true,
|
|
"inputTokens": 8413,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1302
|
|
},
|
|
{
|
|
"questionId": "q66",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "cancelled",
|
|
"actual": "cancelled",
|
|
"correct": true,
|
|
"inputTokens": 9158,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1006
|
|
},
|
|
{
|
|
"questionId": "q66",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "cancelled",
|
|
"actual": "cancelled",
|
|
"correct": true,
|
|
"inputTokens": 9288,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1106
|
|
},
|
|
{
|
|
"questionId": "q66",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "cancelled",
|
|
"actual": "cancelled",
|
|
"correct": true,
|
|
"inputTokens": 7373,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1801
|
|
},
|
|
{
|
|
"questionId": "q66",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "cancelled",
|
|
"actual": "cancelled",
|
|
"correct": true,
|
|
"inputTokens": 8384,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1626
|
|
},
|
|
{
|
|
"questionId": "q67",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "423.6",
|
|
"actual": "423.6",
|
|
"correct": true,
|
|
"inputTokens": 9740,
|
|
"outputTokens": 4,
|
|
"latencyMs": 2107
|
|
},
|
|
{
|
|
"questionId": "q67",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "423.6",
|
|
"actual": "423.6",
|
|
"correct": true,
|
|
"inputTokens": 11907,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1183
|
|
},
|
|
{
|
|
"questionId": "q67",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "423.6",
|
|
"actual": "423.6",
|
|
"correct": true,
|
|
"inputTokens": 6014,
|
|
"outputTokens": 4,
|
|
"latencyMs": 7091
|
|
},
|
|
{
|
|
"questionId": "q67",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "423.6",
|
|
"actual": "423.6",
|
|
"correct": true,
|
|
"inputTokens": 6993,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1730
|
|
},
|
|
{
|
|
"questionId": "q67",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "423.6",
|
|
"actual": "423.6",
|
|
"correct": true,
|
|
"inputTokens": 6782,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1222
|
|
},
|
|
{
|
|
"questionId": "q67",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "423.6",
|
|
"actual": "423.6",
|
|
"correct": true,
|
|
"inputTokens": 8414,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1447
|
|
},
|
|
{
|
|
"questionId": "q67",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "423.6",
|
|
"actual": "423.6",
|
|
"correct": true,
|
|
"inputTokens": 9159,
|
|
"outputTokens": 4,
|
|
"latencyMs": 10295
|
|
},
|
|
{
|
|
"questionId": "q67",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "423.6",
|
|
"actual": "423.6",
|
|
"correct": true,
|
|
"inputTokens": 9289,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1228
|
|
},
|
|
{
|
|
"questionId": "q67",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "423.6",
|
|
"actual": "423.6",
|
|
"correct": true,
|
|
"inputTokens": 7374,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1748
|
|
},
|
|
{
|
|
"questionId": "q67",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "423.6",
|
|
"actual": "423.6",
|
|
"correct": true,
|
|
"inputTokens": 8385,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1373
|
|
},
|
|
{
|
|
"questionId": "q68",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "delivered",
|
|
"actual": "delivered",
|
|
"correct": true,
|
|
"inputTokens": 9739,
|
|
"outputTokens": 3,
|
|
"latencyMs": 3836
|
|
},
|
|
{
|
|
"questionId": "q68",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "delivered",
|
|
"actual": "delivered",
|
|
"correct": true,
|
|
"inputTokens": 11906,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1297
|
|
},
|
|
{
|
|
"questionId": "q68",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "delivered",
|
|
"actual": "delivered",
|
|
"correct": true,
|
|
"inputTokens": 6013,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1927
|
|
},
|
|
{
|
|
"questionId": "q68",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "delivered",
|
|
"actual": "delivered",
|
|
"correct": true,
|
|
"inputTokens": 6992,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1171
|
|
},
|
|
{
|
|
"questionId": "q68",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "delivered",
|
|
"actual": "delivered",
|
|
"correct": true,
|
|
"inputTokens": 6781,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1551
|
|
},
|
|
{
|
|
"questionId": "q68",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "delivered",
|
|
"actual": "delivered",
|
|
"correct": true,
|
|
"inputTokens": 8413,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1273
|
|
},
|
|
{
|
|
"questionId": "q68",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "delivered",
|
|
"actual": "delivered",
|
|
"correct": true,
|
|
"inputTokens": 9158,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1387
|
|
},
|
|
{
|
|
"questionId": "q68",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "delivered",
|
|
"actual": "delivered",
|
|
"correct": true,
|
|
"inputTokens": 9288,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1237
|
|
},
|
|
{
|
|
"questionId": "q68",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "delivered",
|
|
"actual": "delivered",
|
|
"correct": true,
|
|
"inputTokens": 7373,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1934
|
|
},
|
|
{
|
|
"questionId": "q68",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "delivered",
|
|
"actual": "delivered",
|
|
"correct": true,
|
|
"inputTokens": 8384,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1132
|
|
},
|
|
{
|
|
"questionId": "q69",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "784.03",
|
|
"actual": "784.03",
|
|
"correct": true,
|
|
"inputTokens": 9740,
|
|
"outputTokens": 4,
|
|
"latencyMs": 2267
|
|
},
|
|
{
|
|
"questionId": "q69",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "784.03",
|
|
"actual": "784.03",
|
|
"correct": true,
|
|
"inputTokens": 11907,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1772
|
|
},
|
|
{
|
|
"questionId": "q69",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "784.03",
|
|
"actual": "784.03",
|
|
"correct": true,
|
|
"inputTokens": 6014,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1315
|
|
},
|
|
{
|
|
"questionId": "q69",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "784.03",
|
|
"actual": "784.03",
|
|
"correct": true,
|
|
"inputTokens": 6993,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1165
|
|
},
|
|
{
|
|
"questionId": "q69",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "784.03",
|
|
"actual": "784.03",
|
|
"correct": true,
|
|
"inputTokens": 6782,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1097
|
|
},
|
|
{
|
|
"questionId": "q69",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "784.03",
|
|
"actual": "784.03",
|
|
"correct": true,
|
|
"inputTokens": 8414,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1299
|
|
},
|
|
{
|
|
"questionId": "q69",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "784.03",
|
|
"actual": "784.03",
|
|
"correct": true,
|
|
"inputTokens": 9159,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1779
|
|
},
|
|
{
|
|
"questionId": "q69",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "784.03",
|
|
"actual": "784.03",
|
|
"correct": true,
|
|
"inputTokens": 9289,
|
|
"outputTokens": 7,
|
|
"latencyMs": 3153
|
|
},
|
|
{
|
|
"questionId": "q69",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "784.03",
|
|
"actual": "784.03",
|
|
"correct": true,
|
|
"inputTokens": 7374,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1813
|
|
},
|
|
{
|
|
"questionId": "q69",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "784.03",
|
|
"actual": "784.03",
|
|
"correct": true,
|
|
"inputTokens": 8385,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1867
|
|
},
|
|
{
|
|
"questionId": "q70",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "shipped",
|
|
"actual": "shipped",
|
|
"correct": true,
|
|
"inputTokens": 9739,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1611
|
|
},
|
|
{
|
|
"questionId": "q70",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "shipped",
|
|
"actual": "shipped",
|
|
"correct": true,
|
|
"inputTokens": 11906,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1173
|
|
},
|
|
{
|
|
"questionId": "q70",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "shipped",
|
|
"actual": "shipped",
|
|
"correct": true,
|
|
"inputTokens": 6013,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1977
|
|
},
|
|
{
|
|
"questionId": "q70",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "shipped",
|
|
"actual": "shipped",
|
|
"correct": true,
|
|
"inputTokens": 6992,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1108
|
|
},
|
|
{
|
|
"questionId": "q70",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "shipped",
|
|
"actual": "shipped",
|
|
"correct": true,
|
|
"inputTokens": 6781,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1324
|
|
},
|
|
{
|
|
"questionId": "q70",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "shipped",
|
|
"actual": "shipped",
|
|
"correct": true,
|
|
"inputTokens": 8413,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1225
|
|
},
|
|
{
|
|
"questionId": "q70",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "shipped",
|
|
"actual": "shipped",
|
|
"correct": true,
|
|
"inputTokens": 9158,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1416
|
|
},
|
|
{
|
|
"questionId": "q70",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "shipped",
|
|
"actual": "shipped",
|
|
"correct": true,
|
|
"inputTokens": 9288,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1200
|
|
},
|
|
{
|
|
"questionId": "q70",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "shipped",
|
|
"actual": "shipped",
|
|
"correct": true,
|
|
"inputTokens": 7373,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1259
|
|
},
|
|
{
|
|
"questionId": "q70",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "shipped",
|
|
"actual": "shipped",
|
|
"correct": true,
|
|
"inputTokens": 8384,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1433
|
|
},
|
|
{
|
|
"questionId": "q71",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "645.88",
|
|
"actual": "645.88",
|
|
"correct": true,
|
|
"inputTokens": 9740,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1729
|
|
},
|
|
{
|
|
"questionId": "q71",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "645.88",
|
|
"actual": "645.88",
|
|
"correct": true,
|
|
"inputTokens": 11907,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1143
|
|
},
|
|
{
|
|
"questionId": "q71",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "645.88",
|
|
"actual": "645.88",
|
|
"correct": true,
|
|
"inputTokens": 6014,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1837
|
|
},
|
|
{
|
|
"questionId": "q71",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "645.88",
|
|
"actual": "645.88",
|
|
"correct": true,
|
|
"inputTokens": 6993,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1147
|
|
},
|
|
{
|
|
"questionId": "q71",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "645.88",
|
|
"actual": "645.88",
|
|
"correct": true,
|
|
"inputTokens": 6782,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1777
|
|
},
|
|
{
|
|
"questionId": "q71",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "645.88",
|
|
"actual": "645.88",
|
|
"correct": true,
|
|
"inputTokens": 8414,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1295
|
|
},
|
|
{
|
|
"questionId": "q71",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "645.88",
|
|
"actual": "645.88",
|
|
"correct": true,
|
|
"inputTokens": 9159,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1081
|
|
},
|
|
{
|
|
"questionId": "q71",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "645.88",
|
|
"actual": "645.88",
|
|
"correct": true,
|
|
"inputTokens": 9289,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1692
|
|
},
|
|
{
|
|
"questionId": "q71",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "645.88",
|
|
"actual": "645.88",
|
|
"correct": true,
|
|
"inputTokens": 7374,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1661
|
|
},
|
|
{
|
|
"questionId": "q71",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "645.88",
|
|
"actual": "645.88",
|
|
"correct": true,
|
|
"inputTokens": 8385,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1475
|
|
},
|
|
{
|
|
"questionId": "q72",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "processing",
|
|
"actual": "processing",
|
|
"correct": true,
|
|
"inputTokens": 9739,
|
|
"outputTokens": 2,
|
|
"latencyMs": 2979
|
|
},
|
|
{
|
|
"questionId": "q72",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "processing",
|
|
"actual": "processing",
|
|
"correct": true,
|
|
"inputTokens": 11906,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1187
|
|
},
|
|
{
|
|
"questionId": "q72",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "processing",
|
|
"actual": "processing",
|
|
"correct": true,
|
|
"inputTokens": 6013,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1620
|
|
},
|
|
{
|
|
"questionId": "q72",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "processing",
|
|
"actual": "processing",
|
|
"correct": true,
|
|
"inputTokens": 6992,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1532
|
|
},
|
|
{
|
|
"questionId": "q72",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "processing",
|
|
"actual": "processing",
|
|
"correct": true,
|
|
"inputTokens": 6781,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1616
|
|
},
|
|
{
|
|
"questionId": "q72",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "processing",
|
|
"actual": "processing",
|
|
"correct": true,
|
|
"inputTokens": 8413,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1435
|
|
},
|
|
{
|
|
"questionId": "q72",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "processing",
|
|
"actual": "processing",
|
|
"correct": true,
|
|
"inputTokens": 9158,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1190
|
|
},
|
|
{
|
|
"questionId": "q72",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "processing",
|
|
"actual": "processing",
|
|
"correct": true,
|
|
"inputTokens": 9288,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1414
|
|
},
|
|
{
|
|
"questionId": "q72",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "processing",
|
|
"actual": "processing",
|
|
"correct": true,
|
|
"inputTokens": 7373,
|
|
"outputTokens": 2,
|
|
"latencyMs": 2335
|
|
},
|
|
{
|
|
"questionId": "q72",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "processing",
|
|
"actual": "processing",
|
|
"correct": true,
|
|
"inputTokens": 8384,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1308
|
|
},
|
|
{
|
|
"questionId": "q73",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "371.91",
|
|
"actual": "371.91",
|
|
"correct": true,
|
|
"inputTokens": 9740,
|
|
"outputTokens": 4,
|
|
"latencyMs": 3359
|
|
},
|
|
{
|
|
"questionId": "q73",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "371.91",
|
|
"actual": "371.91",
|
|
"correct": true,
|
|
"inputTokens": 11907,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1227
|
|
},
|
|
{
|
|
"questionId": "q73",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "371.91",
|
|
"actual": "371.91",
|
|
"correct": true,
|
|
"inputTokens": 6014,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1439
|
|
},
|
|
{
|
|
"questionId": "q73",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "371.91",
|
|
"actual": "371.91",
|
|
"correct": true,
|
|
"inputTokens": 6993,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1179
|
|
},
|
|
{
|
|
"questionId": "q73",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "371.91",
|
|
"actual": "371.91",
|
|
"correct": true,
|
|
"inputTokens": 6782,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1064
|
|
},
|
|
{
|
|
"questionId": "q73",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "371.91",
|
|
"actual": "371.91",
|
|
"correct": true,
|
|
"inputTokens": 8414,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1144
|
|
},
|
|
{
|
|
"questionId": "q73",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "371.91",
|
|
"actual": "371.91",
|
|
"correct": true,
|
|
"inputTokens": 9159,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1873
|
|
},
|
|
{
|
|
"questionId": "q73",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "371.91",
|
|
"actual": "371.91",
|
|
"correct": true,
|
|
"inputTokens": 9289,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1302
|
|
},
|
|
{
|
|
"questionId": "q73",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "371.91",
|
|
"actual": "371.91",
|
|
"correct": true,
|
|
"inputTokens": 7374,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1956
|
|
},
|
|
{
|
|
"questionId": "q73",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "371.91",
|
|
"actual": "371.91",
|
|
"correct": true,
|
|
"inputTokens": 8385,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1281
|
|
},
|
|
{
|
|
"questionId": "q74",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "pending",
|
|
"actual": "pending",
|
|
"correct": true,
|
|
"inputTokens": 9739,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1591
|
|
},
|
|
{
|
|
"questionId": "q74",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "pending",
|
|
"actual": "pending",
|
|
"correct": true,
|
|
"inputTokens": 11906,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1279
|
|
},
|
|
{
|
|
"questionId": "q74",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "pending",
|
|
"actual": "pending",
|
|
"correct": true,
|
|
"inputTokens": 6013,
|
|
"outputTokens": 2,
|
|
"latencyMs": 3152
|
|
},
|
|
{
|
|
"questionId": "q74",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "pending",
|
|
"actual": "pending",
|
|
"correct": true,
|
|
"inputTokens": 6992,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1061
|
|
},
|
|
{
|
|
"questionId": "q74",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "pending",
|
|
"actual": "pending",
|
|
"correct": true,
|
|
"inputTokens": 6781,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1557
|
|
},
|
|
{
|
|
"questionId": "q74",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "pending",
|
|
"actual": "pending",
|
|
"correct": true,
|
|
"inputTokens": 8413,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1313
|
|
},
|
|
{
|
|
"questionId": "q74",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "pending",
|
|
"actual": "pending",
|
|
"correct": true,
|
|
"inputTokens": 9158,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1433
|
|
},
|
|
{
|
|
"questionId": "q74",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "pending",
|
|
"actual": "pending",
|
|
"correct": true,
|
|
"inputTokens": 9288,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1812
|
|
},
|
|
{
|
|
"questionId": "q74",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "pending",
|
|
"actual": "pending",
|
|
"correct": true,
|
|
"inputTokens": 7373,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1024
|
|
},
|
|
{
|
|
"questionId": "q74",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "pending",
|
|
"actual": "pending",
|
|
"correct": true,
|
|
"inputTokens": 8384,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1243
|
|
},
|
|
{
|
|
"questionId": "q75",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "1066",
|
|
"actual": "1066",
|
|
"correct": true,
|
|
"inputTokens": 9740,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1500
|
|
},
|
|
{
|
|
"questionId": "q75",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "1066",
|
|
"actual": "1066",
|
|
"correct": true,
|
|
"inputTokens": 11907,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1275
|
|
},
|
|
{
|
|
"questionId": "q75",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "1066",
|
|
"actual": "1066",
|
|
"correct": true,
|
|
"inputTokens": 6014,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1841
|
|
},
|
|
{
|
|
"questionId": "q75",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "1066",
|
|
"actual": "1066",
|
|
"correct": true,
|
|
"inputTokens": 6993,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1080
|
|
},
|
|
{
|
|
"questionId": "q75",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "1066",
|
|
"actual": "1066",
|
|
"correct": true,
|
|
"inputTokens": 6782,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1209
|
|
},
|
|
{
|
|
"questionId": "q75",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "1066",
|
|
"actual": "1066",
|
|
"correct": true,
|
|
"inputTokens": 8414,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1308
|
|
},
|
|
{
|
|
"questionId": "q75",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "1066",
|
|
"actual": "1066",
|
|
"correct": true,
|
|
"inputTokens": 9159,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1556
|
|
},
|
|
{
|
|
"questionId": "q75",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "1066",
|
|
"actual": "1066",
|
|
"correct": true,
|
|
"inputTokens": 9289,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1240
|
|
},
|
|
{
|
|
"questionId": "q75",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "1066",
|
|
"actual": "1066",
|
|
"correct": true,
|
|
"inputTokens": 7374,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1254
|
|
},
|
|
{
|
|
"questionId": "q75",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "1066",
|
|
"actual": "1066",
|
|
"correct": true,
|
|
"inputTokens": 8385,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1305
|
|
},
|
|
{
|
|
"questionId": "q76",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "cancelled",
|
|
"actual": "cancelled",
|
|
"correct": true,
|
|
"inputTokens": 9739,
|
|
"outputTokens": 3,
|
|
"latencyMs": 2606
|
|
},
|
|
{
|
|
"questionId": "q76",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "cancelled",
|
|
"actual": "cancelled",
|
|
"correct": true,
|
|
"inputTokens": 11906,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1422
|
|
},
|
|
{
|
|
"questionId": "q76",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "cancelled",
|
|
"actual": "cancelled",
|
|
"correct": true,
|
|
"inputTokens": 6013,
|
|
"outputTokens": 3,
|
|
"latencyMs": 2688
|
|
},
|
|
{
|
|
"questionId": "q76",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "cancelled",
|
|
"actual": "cancelled",
|
|
"correct": true,
|
|
"inputTokens": 6992,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1041
|
|
},
|
|
{
|
|
"questionId": "q76",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "cancelled",
|
|
"actual": "cancelled",
|
|
"correct": true,
|
|
"inputTokens": 6781,
|
|
"outputTokens": 3,
|
|
"latencyMs": 3070
|
|
},
|
|
{
|
|
"questionId": "q76",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "cancelled",
|
|
"actual": "cancelled",
|
|
"correct": true,
|
|
"inputTokens": 8413,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1167
|
|
},
|
|
{
|
|
"questionId": "q76",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "cancelled",
|
|
"actual": "cancelled",
|
|
"correct": true,
|
|
"inputTokens": 9158,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1702
|
|
},
|
|
{
|
|
"questionId": "q76",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "cancelled",
|
|
"actual": "cancelled",
|
|
"correct": true,
|
|
"inputTokens": 9288,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1182
|
|
},
|
|
{
|
|
"questionId": "q76",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "cancelled",
|
|
"actual": "cancelled",
|
|
"correct": true,
|
|
"inputTokens": 7373,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1740
|
|
},
|
|
{
|
|
"questionId": "q76",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "cancelled",
|
|
"actual": "cancelled",
|
|
"correct": true,
|
|
"inputTokens": 8384,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1404
|
|
},
|
|
{
|
|
"questionId": "q77",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "1697.4",
|
|
"actual": "1697.4",
|
|
"correct": true,
|
|
"inputTokens": 9740,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1596
|
|
},
|
|
{
|
|
"questionId": "q77",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "1697.4",
|
|
"actual": "1697.4",
|
|
"correct": true,
|
|
"inputTokens": 11907,
|
|
"outputTokens": 8,
|
|
"latencyMs": 2314
|
|
},
|
|
{
|
|
"questionId": "q77",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "1697.4",
|
|
"actual": "1697.4",
|
|
"correct": true,
|
|
"inputTokens": 6014,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1114
|
|
},
|
|
{
|
|
"questionId": "q77",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "1697.4",
|
|
"actual": "1697.4",
|
|
"correct": true,
|
|
"inputTokens": 6993,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1289
|
|
},
|
|
{
|
|
"questionId": "q77",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "1697.4",
|
|
"actual": "1697.4",
|
|
"correct": true,
|
|
"inputTokens": 6782,
|
|
"outputTokens": 5,
|
|
"latencyMs": 2428
|
|
},
|
|
{
|
|
"questionId": "q77",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "1697.4",
|
|
"actual": "1697.4",
|
|
"correct": true,
|
|
"inputTokens": 8414,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1325
|
|
},
|
|
{
|
|
"questionId": "q77",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "1697.4",
|
|
"actual": "1697.4",
|
|
"correct": true,
|
|
"inputTokens": 9159,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1343
|
|
},
|
|
{
|
|
"questionId": "q77",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "1697.4",
|
|
"actual": "1697.4",
|
|
"correct": true,
|
|
"inputTokens": 9289,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1783
|
|
},
|
|
{
|
|
"questionId": "q77",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "1697.4",
|
|
"actual": "1697.4",
|
|
"correct": true,
|
|
"inputTokens": 7374,
|
|
"outputTokens": 5,
|
|
"latencyMs": 918
|
|
},
|
|
{
|
|
"questionId": "q77",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "1697.4",
|
|
"actual": "1697.4",
|
|
"correct": true,
|
|
"inputTokens": 8385,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1308
|
|
},
|
|
{
|
|
"questionId": "q78",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "delivered",
|
|
"actual": "delivered",
|
|
"correct": true,
|
|
"inputTokens": 9739,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1396
|
|
},
|
|
{
|
|
"questionId": "q78",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "delivered",
|
|
"actual": "delivered",
|
|
"correct": true,
|
|
"inputTokens": 11906,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1225
|
|
},
|
|
{
|
|
"questionId": "q78",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "delivered",
|
|
"actual": "delivered",
|
|
"correct": true,
|
|
"inputTokens": 6013,
|
|
"outputTokens": 3,
|
|
"latencyMs": 2294
|
|
},
|
|
{
|
|
"questionId": "q78",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "delivered",
|
|
"actual": "delivered",
|
|
"correct": true,
|
|
"inputTokens": 6992,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1418
|
|
},
|
|
{
|
|
"questionId": "q78",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "delivered",
|
|
"actual": "delivered",
|
|
"correct": true,
|
|
"inputTokens": 6781,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1613
|
|
},
|
|
{
|
|
"questionId": "q78",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "delivered",
|
|
"actual": "delivered",
|
|
"correct": true,
|
|
"inputTokens": 8413,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1374
|
|
},
|
|
{
|
|
"questionId": "q78",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "delivered",
|
|
"actual": "delivered",
|
|
"correct": true,
|
|
"inputTokens": 9158,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1341
|
|
},
|
|
{
|
|
"questionId": "q78",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "delivered",
|
|
"actual": "delivered",
|
|
"correct": true,
|
|
"inputTokens": 9288,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1223
|
|
},
|
|
{
|
|
"questionId": "q78",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "delivered",
|
|
"actual": "delivered",
|
|
"correct": true,
|
|
"inputTokens": 7373,
|
|
"outputTokens": 3,
|
|
"latencyMs": 2230
|
|
},
|
|
{
|
|
"questionId": "q78",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "delivered",
|
|
"actual": "delivered",
|
|
"correct": true,
|
|
"inputTokens": 8384,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1425
|
|
},
|
|
{
|
|
"questionId": "q79",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Valerie Braun",
|
|
"actual": "Valerie Braun",
|
|
"correct": true,
|
|
"inputTokens": 9740,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1377
|
|
},
|
|
{
|
|
"questionId": "q79",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Valerie Braun",
|
|
"actual": "Valerie Braun",
|
|
"correct": true,
|
|
"inputTokens": 11907,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1550
|
|
},
|
|
{
|
|
"questionId": "q79",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Valerie Braun",
|
|
"actual": "Valerie Braun",
|
|
"correct": true,
|
|
"inputTokens": 6014,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1394
|
|
},
|
|
{
|
|
"questionId": "q79",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Valerie Braun",
|
|
"actual": "Valerie Braun",
|
|
"correct": true,
|
|
"inputTokens": 6993,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1202
|
|
},
|
|
{
|
|
"questionId": "q79",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Valerie Braun",
|
|
"actual": "Valerie Braun",
|
|
"correct": true,
|
|
"inputTokens": 6782,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1435
|
|
},
|
|
{
|
|
"questionId": "q79",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Valerie Braun",
|
|
"actual": "Valerie Braun",
|
|
"correct": true,
|
|
"inputTokens": 8414,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1277
|
|
},
|
|
{
|
|
"questionId": "q79",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Valerie Braun",
|
|
"actual": "Valerie Braun",
|
|
"correct": true,
|
|
"inputTokens": 9159,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1564
|
|
},
|
|
{
|
|
"questionId": "q79",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Valerie Braun",
|
|
"actual": "Valerie Braun",
|
|
"correct": true,
|
|
"inputTokens": 9289,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1200
|
|
},
|
|
{
|
|
"questionId": "q79",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Valerie Braun",
|
|
"actual": "Valerie Braun",
|
|
"correct": true,
|
|
"inputTokens": 7374,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1596
|
|
},
|
|
{
|
|
"questionId": "q79",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Valerie Braun",
|
|
"actual": "Valerie Braun",
|
|
"correct": true,
|
|
"inputTokens": 8385,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1151
|
|
},
|
|
{
|
|
"questionId": "q80",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Anita Kozey",
|
|
"actual": "Anita Kozey",
|
|
"correct": true,
|
|
"inputTokens": 9740,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1458
|
|
},
|
|
{
|
|
"questionId": "q80",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Anita Kozey",
|
|
"actual": "Anita Kozey",
|
|
"correct": true,
|
|
"inputTokens": 11907,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1283
|
|
},
|
|
{
|
|
"questionId": "q80",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Anita Kozey",
|
|
"actual": "Anita Kozey",
|
|
"correct": true,
|
|
"inputTokens": 6014,
|
|
"outputTokens": 5,
|
|
"latencyMs": 4702
|
|
},
|
|
{
|
|
"questionId": "q80",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Anita Kozey",
|
|
"actual": "Anita Kozey",
|
|
"correct": true,
|
|
"inputTokens": 6993,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1360
|
|
},
|
|
{
|
|
"questionId": "q80",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Anita Kozey",
|
|
"actual": "Anita Kozey",
|
|
"correct": true,
|
|
"inputTokens": 6782,
|
|
"outputTokens": 5,
|
|
"latencyMs": 6167
|
|
},
|
|
{
|
|
"questionId": "q80",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Anita Kozey",
|
|
"actual": "Anita Kozey",
|
|
"correct": true,
|
|
"inputTokens": 8414,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1449
|
|
},
|
|
{
|
|
"questionId": "q80",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Anita Kozey",
|
|
"actual": "Anita Kozey",
|
|
"correct": true,
|
|
"inputTokens": 9159,
|
|
"outputTokens": 5,
|
|
"latencyMs": 6096
|
|
},
|
|
{
|
|
"questionId": "q80",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Anita Kozey",
|
|
"actual": "Anita Kozey",
|
|
"correct": true,
|
|
"inputTokens": 9289,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1194
|
|
},
|
|
{
|
|
"questionId": "q80",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Anita Kozey",
|
|
"actual": "Anita Kozey",
|
|
"correct": true,
|
|
"inputTokens": 7374,
|
|
"outputTokens": 5,
|
|
"latencyMs": 7357
|
|
},
|
|
{
|
|
"questionId": "q80",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Anita Kozey",
|
|
"actual": "Anita Kozey",
|
|
"correct": true,
|
|
"inputTokens": 8385,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1213
|
|
},
|
|
{
|
|
"questionId": "q81",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Elmer Kub PhD",
|
|
"actual": "Elmer Kub PhD",
|
|
"correct": true,
|
|
"inputTokens": 9740,
|
|
"outputTokens": 6,
|
|
"latencyMs": 2539
|
|
},
|
|
{
|
|
"questionId": "q81",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Elmer Kub PhD",
|
|
"actual": "Elmer Kub PhD",
|
|
"correct": true,
|
|
"inputTokens": 11907,
|
|
"outputTokens": 10,
|
|
"latencyMs": 1532
|
|
},
|
|
{
|
|
"questionId": "q81",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Elmer Kub PhD",
|
|
"actual": "Elmer Kub PhD",
|
|
"correct": true,
|
|
"inputTokens": 6014,
|
|
"outputTokens": 6,
|
|
"latencyMs": 2960
|
|
},
|
|
{
|
|
"questionId": "q81",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Elmer Kub PhD",
|
|
"actual": "Elmer Kub PhD",
|
|
"correct": true,
|
|
"inputTokens": 6993,
|
|
"outputTokens": 10,
|
|
"latencyMs": 1547
|
|
},
|
|
{
|
|
"questionId": "q81",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Elmer Kub PhD",
|
|
"actual": "Elmer Kub PhD",
|
|
"correct": true,
|
|
"inputTokens": 6782,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1358
|
|
},
|
|
{
|
|
"questionId": "q81",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Elmer Kub PhD",
|
|
"actual": "Elmer Kub PhD",
|
|
"correct": true,
|
|
"inputTokens": 8414,
|
|
"outputTokens": 10,
|
|
"latencyMs": 1424
|
|
},
|
|
{
|
|
"questionId": "q81",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Elmer Kub PhD",
|
|
"actual": "Elmer Kub PhD",
|
|
"correct": true,
|
|
"inputTokens": 9159,
|
|
"outputTokens": 6,
|
|
"latencyMs": 958
|
|
},
|
|
{
|
|
"questionId": "q81",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Elmer Kub PhD",
|
|
"actual": "Elmer Kub PhD",
|
|
"correct": true,
|
|
"inputTokens": 9289,
|
|
"outputTokens": 10,
|
|
"latencyMs": 1381
|
|
},
|
|
{
|
|
"questionId": "q81",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Elmer Kub PhD",
|
|
"actual": "Elmer Kub PhD",
|
|
"correct": true,
|
|
"inputTokens": 7374,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1372
|
|
},
|
|
{
|
|
"questionId": "q81",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Elmer Kub PhD",
|
|
"actual": "Elmer Kub PhD",
|
|
"correct": true,
|
|
"inputTokens": 8385,
|
|
"outputTokens": 10,
|
|
"latencyMs": 1715
|
|
},
|
|
{
|
|
"questionId": "q82",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Maxine Zemlak",
|
|
"actual": "Maxine Zemlak",
|
|
"correct": true,
|
|
"inputTokens": 9740,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1972
|
|
},
|
|
{
|
|
"questionId": "q82",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Maxine Zemlak",
|
|
"actual": "Maxine Zemlak",
|
|
"correct": true,
|
|
"inputTokens": 11907,
|
|
"outputTokens": 10,
|
|
"latencyMs": 1315
|
|
},
|
|
{
|
|
"questionId": "q82",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Maxine Zemlak",
|
|
"actual": "Maxine Zemlak",
|
|
"correct": true,
|
|
"inputTokens": 6014,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1634
|
|
},
|
|
{
|
|
"questionId": "q82",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Maxine Zemlak",
|
|
"actual": "Maxine Zemlak",
|
|
"correct": true,
|
|
"inputTokens": 6993,
|
|
"outputTokens": 10,
|
|
"latencyMs": 1264
|
|
},
|
|
{
|
|
"questionId": "q82",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Maxine Zemlak",
|
|
"actual": "Maxine Zemlak",
|
|
"correct": true,
|
|
"inputTokens": 6782,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1153
|
|
},
|
|
{
|
|
"questionId": "q82",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Maxine Zemlak",
|
|
"actual": "Maxine Zemlak",
|
|
"correct": true,
|
|
"inputTokens": 8414,
|
|
"outputTokens": 10,
|
|
"latencyMs": 1252
|
|
},
|
|
{
|
|
"questionId": "q82",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Maxine Zemlak",
|
|
"actual": "Maxine Zemlak",
|
|
"correct": true,
|
|
"inputTokens": 9159,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1697
|
|
},
|
|
{
|
|
"questionId": "q82",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Maxine Zemlak",
|
|
"actual": "Maxine Zemlak",
|
|
"correct": true,
|
|
"inputTokens": 9289,
|
|
"outputTokens": 10,
|
|
"latencyMs": 1198
|
|
},
|
|
{
|
|
"questionId": "q82",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Maxine Zemlak",
|
|
"actual": "Maxine Zemlak",
|
|
"correct": true,
|
|
"inputTokens": 7374,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1854
|
|
},
|
|
{
|
|
"questionId": "q82",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Maxine Zemlak",
|
|
"actual": "Maxine Zemlak",
|
|
"correct": true,
|
|
"inputTokens": 8385,
|
|
"outputTokens": 10,
|
|
"latencyMs": 1752
|
|
},
|
|
{
|
|
"questionId": "q83",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Emanuel Littel",
|
|
"actual": "Emanuel Littel",
|
|
"correct": true,
|
|
"inputTokens": 9740,
|
|
"outputTokens": 5,
|
|
"latencyMs": 2076
|
|
},
|
|
{
|
|
"questionId": "q83",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Emanuel Littel",
|
|
"actual": "Emanuel Littel",
|
|
"correct": true,
|
|
"inputTokens": 11907,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1398
|
|
},
|
|
{
|
|
"questionId": "q83",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Emanuel Littel",
|
|
"actual": "Emanuel Littel",
|
|
"correct": true,
|
|
"inputTokens": 6014,
|
|
"outputTokens": 5,
|
|
"latencyMs": 2263
|
|
},
|
|
{
|
|
"questionId": "q83",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Emanuel Littel",
|
|
"actual": "Emanuel Littel",
|
|
"correct": true,
|
|
"inputTokens": 6993,
|
|
"outputTokens": 7,
|
|
"latencyMs": 3101
|
|
},
|
|
{
|
|
"questionId": "q83",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Emanuel Littel",
|
|
"actual": "Emanuel Littel",
|
|
"correct": true,
|
|
"inputTokens": 6782,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1453
|
|
},
|
|
{
|
|
"questionId": "q83",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Emanuel Littel",
|
|
"actual": "Emanuel Littel",
|
|
"correct": true,
|
|
"inputTokens": 8414,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1265
|
|
},
|
|
{
|
|
"questionId": "q83",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Emanuel Littel",
|
|
"actual": "Emanuel Littel",
|
|
"correct": true,
|
|
"inputTokens": 9159,
|
|
"outputTokens": 5,
|
|
"latencyMs": 8807
|
|
},
|
|
{
|
|
"questionId": "q83",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Emanuel Littel",
|
|
"actual": "Emanuel Littel",
|
|
"correct": true,
|
|
"inputTokens": 9289,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1097
|
|
},
|
|
{
|
|
"questionId": "q83",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Emanuel Littel",
|
|
"actual": "Emanuel Littel",
|
|
"correct": true,
|
|
"inputTokens": 7374,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1667
|
|
},
|
|
{
|
|
"questionId": "q83",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Emanuel Littel",
|
|
"actual": "Emanuel Littel",
|
|
"correct": true,
|
|
"inputTokens": 8385,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1198
|
|
},
|
|
{
|
|
"questionId": "q84",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Andrew Kling",
|
|
"actual": "Andrew Kling",
|
|
"correct": true,
|
|
"inputTokens": 9740,
|
|
"outputTokens": 3,
|
|
"latencyMs": 2292
|
|
},
|
|
{
|
|
"questionId": "q84",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Andrew Kling",
|
|
"actual": "Andrew Kling",
|
|
"correct": true,
|
|
"inputTokens": 11907,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1202
|
|
},
|
|
{
|
|
"questionId": "q84",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Andrew Kling",
|
|
"actual": "Andrew Kling",
|
|
"correct": true,
|
|
"inputTokens": 6014,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1801
|
|
},
|
|
{
|
|
"questionId": "q84",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Andrew Kling",
|
|
"actual": "Andrew Kling",
|
|
"correct": true,
|
|
"inputTokens": 6993,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1287
|
|
},
|
|
{
|
|
"questionId": "q84",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Andrew Kling",
|
|
"actual": "Andrew Kling",
|
|
"correct": true,
|
|
"inputTokens": 6782,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1340
|
|
},
|
|
{
|
|
"questionId": "q84",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Andrew Kling",
|
|
"actual": "Andrew Kling",
|
|
"correct": true,
|
|
"inputTokens": 8414,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1163
|
|
},
|
|
{
|
|
"questionId": "q84",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Andrew Kling",
|
|
"actual": "Andrew Kling",
|
|
"correct": true,
|
|
"inputTokens": 9159,
|
|
"outputTokens": 3,
|
|
"latencyMs": 2685
|
|
},
|
|
{
|
|
"questionId": "q84",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Andrew Kling",
|
|
"actual": "Andrew Kling",
|
|
"correct": true,
|
|
"inputTokens": 9289,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1397
|
|
},
|
|
{
|
|
"questionId": "q84",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Andrew Kling",
|
|
"actual": "Andrew Kling",
|
|
"correct": true,
|
|
"inputTokens": 7374,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1289
|
|
},
|
|
{
|
|
"questionId": "q84",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Andrew Kling",
|
|
"actual": "Andrew Kling",
|
|
"correct": true,
|
|
"inputTokens": 8385,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1155
|
|
},
|
|
{
|
|
"questionId": "q85",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Morris O'Hara",
|
|
"actual": "Morris O'Hara",
|
|
"correct": true,
|
|
"inputTokens": 9740,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1601
|
|
},
|
|
{
|
|
"questionId": "q85",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Morris O'Hara",
|
|
"actual": "Morris O'Hara",
|
|
"correct": true,
|
|
"inputTokens": 11907,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1340
|
|
},
|
|
{
|
|
"questionId": "q85",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Morris O'Hara",
|
|
"actual": "Morris O'Hara",
|
|
"correct": true,
|
|
"inputTokens": 6014,
|
|
"outputTokens": 6,
|
|
"latencyMs": 3525
|
|
},
|
|
{
|
|
"questionId": "q85",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Morris O'Hara",
|
|
"actual": "Morris O'Hara",
|
|
"correct": true,
|
|
"inputTokens": 6993,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1710
|
|
},
|
|
{
|
|
"questionId": "q85",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Morris O'Hara",
|
|
"actual": "Morris O'Hara",
|
|
"correct": true,
|
|
"inputTokens": 6782,
|
|
"outputTokens": 6,
|
|
"latencyMs": 2333
|
|
},
|
|
{
|
|
"questionId": "q85",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Morris O'Hara",
|
|
"actual": "Morris O'Hara",
|
|
"correct": true,
|
|
"inputTokens": 8414,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1168
|
|
},
|
|
{
|
|
"questionId": "q85",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Morris O'Hara",
|
|
"actual": "Morris O'Hara",
|
|
"correct": true,
|
|
"inputTokens": 9159,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1781
|
|
},
|
|
{
|
|
"questionId": "q85",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Morris O'Hara",
|
|
"actual": "Morris O'Hara",
|
|
"correct": true,
|
|
"inputTokens": 9289,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1552
|
|
},
|
|
{
|
|
"questionId": "q85",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Morris O'Hara",
|
|
"actual": "Morris O'Hara",
|
|
"correct": true,
|
|
"inputTokens": 7374,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1584
|
|
},
|
|
{
|
|
"questionId": "q85",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Morris O'Hara",
|
|
"actual": "Morris O'Hara",
|
|
"correct": true,
|
|
"inputTokens": 8385,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1548
|
|
},
|
|
{
|
|
"questionId": "q86",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Elijah Franecki",
|
|
"actual": "Elijah Franecki",
|
|
"correct": true,
|
|
"inputTokens": 9740,
|
|
"outputTokens": 6,
|
|
"latencyMs": 7230
|
|
},
|
|
{
|
|
"questionId": "q86",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Elijah Franecki",
|
|
"actual": "Elijah Franecki",
|
|
"correct": true,
|
|
"inputTokens": 11907,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1933
|
|
},
|
|
{
|
|
"questionId": "q86",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Elijah Franecki",
|
|
"actual": "Elijah Franecki",
|
|
"correct": true,
|
|
"inputTokens": 6014,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1067
|
|
},
|
|
{
|
|
"questionId": "q86",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Elijah Franecki",
|
|
"actual": "Elijah Franecki",
|
|
"correct": true,
|
|
"inputTokens": 6993,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1288
|
|
},
|
|
{
|
|
"questionId": "q86",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Elijah Franecki",
|
|
"actual": "Elijah Franecki",
|
|
"correct": true,
|
|
"inputTokens": 6782,
|
|
"outputTokens": 6,
|
|
"latencyMs": 3954
|
|
},
|
|
{
|
|
"questionId": "q86",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Elijah Franecki",
|
|
"actual": "Elijah Franecki",
|
|
"correct": true,
|
|
"inputTokens": 8414,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1314
|
|
},
|
|
{
|
|
"questionId": "q86",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Elijah Franecki",
|
|
"actual": "Elijah Franecki",
|
|
"correct": true,
|
|
"inputTokens": 9159,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1334
|
|
},
|
|
{
|
|
"questionId": "q86",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Elijah Franecki",
|
|
"actual": "Elijah Franecki",
|
|
"correct": true,
|
|
"inputTokens": 9289,
|
|
"outputTokens": 9,
|
|
"latencyMs": 2441
|
|
},
|
|
{
|
|
"questionId": "q86",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Elijah Franecki",
|
|
"actual": "Elijah Franecki",
|
|
"correct": true,
|
|
"inputTokens": 7374,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1650
|
|
},
|
|
{
|
|
"questionId": "q86",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Elijah Franecki",
|
|
"actual": "Elijah Franecki",
|
|
"correct": true,
|
|
"inputTokens": 8385,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1495
|
|
},
|
|
{
|
|
"questionId": "q87",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Malcolm Erdman",
|
|
"actual": "Malcolm Erdman",
|
|
"correct": true,
|
|
"inputTokens": 9740,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1262
|
|
},
|
|
{
|
|
"questionId": "q87",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Malcolm Erdman",
|
|
"actual": "Malcolm Erdman",
|
|
"correct": true,
|
|
"inputTokens": 11907,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1367
|
|
},
|
|
{
|
|
"questionId": "q87",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Malcolm Erdman",
|
|
"actual": "Malcolm Erdman",
|
|
"correct": true,
|
|
"inputTokens": 6014,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1385
|
|
},
|
|
{
|
|
"questionId": "q87",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Malcolm Erdman",
|
|
"actual": "Malcolm Erdman",
|
|
"correct": true,
|
|
"inputTokens": 6993,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1313
|
|
},
|
|
{
|
|
"questionId": "q87",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Malcolm Erdman",
|
|
"actual": "Malcolm Erdman",
|
|
"correct": true,
|
|
"inputTokens": 6782,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1141
|
|
},
|
|
{
|
|
"questionId": "q87",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Malcolm Erdman",
|
|
"actual": "Malcolm Erdman",
|
|
"correct": true,
|
|
"inputTokens": 8414,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1300
|
|
},
|
|
{
|
|
"questionId": "q87",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Malcolm Erdman",
|
|
"actual": "Malcolm Erdman",
|
|
"correct": true,
|
|
"inputTokens": 9159,
|
|
"outputTokens": 5,
|
|
"latencyMs": 3347
|
|
},
|
|
{
|
|
"questionId": "q87",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Malcolm Erdman",
|
|
"actual": "Malcolm Erdman",
|
|
"correct": true,
|
|
"inputTokens": 9289,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1457
|
|
},
|
|
{
|
|
"questionId": "q87",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Malcolm Erdman",
|
|
"actual": "Malcolm Erdman",
|
|
"correct": true,
|
|
"inputTokens": 7374,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1276
|
|
},
|
|
{
|
|
"questionId": "q87",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Malcolm Erdman",
|
|
"actual": "Malcolm Erdman",
|
|
"correct": true,
|
|
"inputTokens": 8385,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1211
|
|
},
|
|
{
|
|
"questionId": "q88",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Fannie Skiles",
|
|
"actual": "Fannie Skiles",
|
|
"correct": true,
|
|
"inputTokens": 9740,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1635
|
|
},
|
|
{
|
|
"questionId": "q88",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Fannie Skiles",
|
|
"actual": "Fannie Skiles",
|
|
"correct": true,
|
|
"inputTokens": 11907,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1582
|
|
},
|
|
{
|
|
"questionId": "q88",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Fannie Skiles",
|
|
"actual": "Fannie Skiles",
|
|
"correct": true,
|
|
"inputTokens": 6014,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1695
|
|
},
|
|
{
|
|
"questionId": "q88",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Fannie Skiles",
|
|
"actual": "Fannie Skiles",
|
|
"correct": true,
|
|
"inputTokens": 6993,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1318
|
|
},
|
|
{
|
|
"questionId": "q88",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Fannie Skiles",
|
|
"actual": "Fannie Skiles",
|
|
"correct": true,
|
|
"inputTokens": 6782,
|
|
"outputTokens": 5,
|
|
"latencyMs": 936
|
|
},
|
|
{
|
|
"questionId": "q88",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Fannie Skiles",
|
|
"actual": "Fannie Skiles",
|
|
"correct": true,
|
|
"inputTokens": 8414,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1204
|
|
},
|
|
{
|
|
"questionId": "q88",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Fannie Skiles",
|
|
"actual": "Fannie Skiles",
|
|
"correct": true,
|
|
"inputTokens": 9159,
|
|
"outputTokens": 5,
|
|
"latencyMs": 996
|
|
},
|
|
{
|
|
"questionId": "q88",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Fannie Skiles",
|
|
"actual": "Fannie Skiles",
|
|
"correct": true,
|
|
"inputTokens": 9289,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1261
|
|
},
|
|
{
|
|
"questionId": "q88",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Fannie Skiles",
|
|
"actual": "Fannie Skiles",
|
|
"correct": true,
|
|
"inputTokens": 7374,
|
|
"outputTokens": 5,
|
|
"latencyMs": 2276
|
|
},
|
|
{
|
|
"questionId": "q88",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Fannie Skiles",
|
|
"actual": "Fannie Skiles",
|
|
"correct": true,
|
|
"inputTokens": 8385,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1380
|
|
},
|
|
{
|
|
"questionId": "q89",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Sonja Emmerich",
|
|
"actual": "Sonja Emmerich",
|
|
"correct": true,
|
|
"inputTokens": 9740,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1451
|
|
},
|
|
{
|
|
"questionId": "q89",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Sonja Emmerich",
|
|
"actual": "Sonja Emmerich",
|
|
"correct": true,
|
|
"inputTokens": 11907,
|
|
"outputTokens": 10,
|
|
"latencyMs": 1977
|
|
},
|
|
{
|
|
"questionId": "q89",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Sonja Emmerich",
|
|
"actual": "Sonja Emmerich",
|
|
"correct": true,
|
|
"inputTokens": 6014,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1376
|
|
},
|
|
{
|
|
"questionId": "q89",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Sonja Emmerich",
|
|
"actual": "Sonja Emmerich",
|
|
"correct": true,
|
|
"inputTokens": 6993,
|
|
"outputTokens": 10,
|
|
"latencyMs": 1250
|
|
},
|
|
{
|
|
"questionId": "q89",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Sonja Emmerich",
|
|
"actual": "Sonja Emmerich",
|
|
"correct": true,
|
|
"inputTokens": 6782,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1273
|
|
},
|
|
{
|
|
"questionId": "q89",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Sonja Emmerich",
|
|
"actual": "Sonja Emmerich",
|
|
"correct": true,
|
|
"inputTokens": 8414,
|
|
"outputTokens": 10,
|
|
"latencyMs": 1359
|
|
},
|
|
{
|
|
"questionId": "q89",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Sonja Emmerich",
|
|
"actual": "Sonja Emmerich",
|
|
"correct": true,
|
|
"inputTokens": 9159,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1791
|
|
},
|
|
{
|
|
"questionId": "q89",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Sonja Emmerich",
|
|
"actual": "Sonja Emmerich",
|
|
"correct": true,
|
|
"inputTokens": 9289,
|
|
"outputTokens": 10,
|
|
"latencyMs": 1273
|
|
},
|
|
{
|
|
"questionId": "q89",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Sonja Emmerich",
|
|
"actual": "Sonja Emmerich",
|
|
"correct": true,
|
|
"inputTokens": 7374,
|
|
"outputTokens": 6,
|
|
"latencyMs": 2832
|
|
},
|
|
{
|
|
"questionId": "q89",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Sonja Emmerich",
|
|
"actual": "Sonja Emmerich",
|
|
"correct": true,
|
|
"inputTokens": 8385,
|
|
"outputTokens": 10,
|
|
"latencyMs": 1172
|
|
},
|
|
{
|
|
"questionId": "q90",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Frank Emmerich DVM",
|
|
"actual": "Frank Emmerich DVM",
|
|
"correct": true,
|
|
"inputTokens": 9740,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1491
|
|
},
|
|
{
|
|
"questionId": "q90",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Frank Emmerich DVM",
|
|
"actual": "Frank Emmerich DVM",
|
|
"correct": true,
|
|
"inputTokens": 11907,
|
|
"outputTokens": 10,
|
|
"latencyMs": 1414
|
|
},
|
|
{
|
|
"questionId": "q90",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Frank Emmerich DVM",
|
|
"actual": "Frank Emmerich DVM",
|
|
"correct": true,
|
|
"inputTokens": 6014,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1396
|
|
},
|
|
{
|
|
"questionId": "q90",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Frank Emmerich DVM",
|
|
"actual": "Frank Emmerich DVM",
|
|
"correct": true,
|
|
"inputTokens": 6993,
|
|
"outputTokens": 10,
|
|
"latencyMs": 1514
|
|
},
|
|
{
|
|
"questionId": "q90",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Frank Emmerich DVM",
|
|
"actual": "Frank Emmerich DVM",
|
|
"correct": true,
|
|
"inputTokens": 6782,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1573
|
|
},
|
|
{
|
|
"questionId": "q90",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Frank Emmerich DVM",
|
|
"actual": "Frank Emmerich DVM",
|
|
"correct": true,
|
|
"inputTokens": 8414,
|
|
"outputTokens": 10,
|
|
"latencyMs": 1284
|
|
},
|
|
{
|
|
"questionId": "q90",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Frank Emmerich DVM",
|
|
"actual": "Frank Emmerich DVM",
|
|
"correct": true,
|
|
"inputTokens": 9159,
|
|
"outputTokens": 7,
|
|
"latencyMs": 5400
|
|
},
|
|
{
|
|
"questionId": "q90",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Frank Emmerich DVM",
|
|
"actual": "Frank Emmerich DVM",
|
|
"correct": true,
|
|
"inputTokens": 9289,
|
|
"outputTokens": 10,
|
|
"latencyMs": 1486
|
|
},
|
|
{
|
|
"questionId": "q90",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Frank Emmerich DVM",
|
|
"actual": "Frank Emmerich DVM",
|
|
"correct": true,
|
|
"inputTokens": 7374,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1420
|
|
},
|
|
{
|
|
"questionId": "q90",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Frank Emmerich DVM",
|
|
"actual": "Frank Emmerich DVM",
|
|
"correct": true,
|
|
"inputTokens": 8385,
|
|
"outputTokens": 10,
|
|
"latencyMs": 1410
|
|
},
|
|
{
|
|
"questionId": "q91",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Ronald Collins",
|
|
"actual": "Ronald Collins",
|
|
"correct": true,
|
|
"inputTokens": 9740,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1248
|
|
},
|
|
{
|
|
"questionId": "q91",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Ronald Collins",
|
|
"actual": "Ronald Collins",
|
|
"correct": true,
|
|
"inputTokens": 11907,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1177
|
|
},
|
|
{
|
|
"questionId": "q91",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Ronald Collins",
|
|
"actual": "Ronald Collins",
|
|
"correct": true,
|
|
"inputTokens": 6014,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1601
|
|
},
|
|
{
|
|
"questionId": "q91",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Ronald Collins",
|
|
"actual": "Ronald Collins",
|
|
"correct": true,
|
|
"inputTokens": 6993,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1822
|
|
},
|
|
{
|
|
"questionId": "q91",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Ronald Collins",
|
|
"actual": "Ronald Collins",
|
|
"correct": true,
|
|
"inputTokens": 6782,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1103
|
|
},
|
|
{
|
|
"questionId": "q91",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Ronald Collins",
|
|
"actual": "Ronald Collins",
|
|
"correct": true,
|
|
"inputTokens": 8414,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1247
|
|
},
|
|
{
|
|
"questionId": "q91",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Ronald Collins",
|
|
"actual": "Ronald Collins",
|
|
"correct": true,
|
|
"inputTokens": 9159,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1184
|
|
},
|
|
{
|
|
"questionId": "q91",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Ronald Collins",
|
|
"actual": "Ronald Collins",
|
|
"correct": true,
|
|
"inputTokens": 9289,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1137
|
|
},
|
|
{
|
|
"questionId": "q91",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Ronald Collins",
|
|
"actual": "Ronald Collins",
|
|
"correct": true,
|
|
"inputTokens": 7374,
|
|
"outputTokens": 4,
|
|
"latencyMs": 949
|
|
},
|
|
{
|
|
"questionId": "q91",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Ronald Collins",
|
|
"actual": "Ronald Collins",
|
|
"correct": true,
|
|
"inputTokens": 8385,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1143
|
|
},
|
|
{
|
|
"questionId": "q92",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Jeannie Klein",
|
|
"actual": "Jeannie Klein",
|
|
"correct": true,
|
|
"inputTokens": 9740,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1021
|
|
},
|
|
{
|
|
"questionId": "q92",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Jeannie Klein",
|
|
"actual": "Jeannie Klein",
|
|
"correct": true,
|
|
"inputTokens": 11907,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1301
|
|
},
|
|
{
|
|
"questionId": "q92",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Jeannie Klein",
|
|
"actual": "Jeannie Klein",
|
|
"correct": true,
|
|
"inputTokens": 6014,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1254
|
|
},
|
|
{
|
|
"questionId": "q92",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Jeannie Klein",
|
|
"actual": "Jeannie Klein",
|
|
"correct": true,
|
|
"inputTokens": 6993,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1375
|
|
},
|
|
{
|
|
"questionId": "q92",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Jeannie Klein",
|
|
"actual": "Jeannie Klein",
|
|
"correct": true,
|
|
"inputTokens": 6782,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1316
|
|
},
|
|
{
|
|
"questionId": "q92",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Jeannie Klein",
|
|
"actual": "Jeannie Klein",
|
|
"correct": true,
|
|
"inputTokens": 8414,
|
|
"outputTokens": 8,
|
|
"latencyMs": 2681
|
|
},
|
|
{
|
|
"questionId": "q92",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Jeannie Klein",
|
|
"actual": "Jeannie Klein",
|
|
"correct": true,
|
|
"inputTokens": 9159,
|
|
"outputTokens": 4,
|
|
"latencyMs": 2427
|
|
},
|
|
{
|
|
"questionId": "q92",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Jeannie Klein",
|
|
"actual": "Jeannie Klein",
|
|
"correct": true,
|
|
"inputTokens": 9289,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1526
|
|
},
|
|
{
|
|
"questionId": "q92",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Jeannie Klein",
|
|
"actual": "Jeannie Klein",
|
|
"correct": true,
|
|
"inputTokens": 7374,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1252
|
|
},
|
|
{
|
|
"questionId": "q92",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Jeannie Klein",
|
|
"actual": "Jeannie Klein",
|
|
"correct": true,
|
|
"inputTokens": 8385,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1324
|
|
},
|
|
{
|
|
"questionId": "q93",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Joshua Watsica",
|
|
"actual": "Joshua Watsica",
|
|
"correct": true,
|
|
"inputTokens": 9740,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1606
|
|
},
|
|
{
|
|
"questionId": "q93",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Joshua Watsica",
|
|
"actual": "Joshua Watsica",
|
|
"correct": true,
|
|
"inputTokens": 11907,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1223
|
|
},
|
|
{
|
|
"questionId": "q93",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Joshua Watsica",
|
|
"actual": "Joshua Watsica",
|
|
"correct": true,
|
|
"inputTokens": 6014,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1965
|
|
},
|
|
{
|
|
"questionId": "q93",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Joshua Watsica",
|
|
"actual": "Joshua Watsica",
|
|
"correct": true,
|
|
"inputTokens": 6993,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1300
|
|
},
|
|
{
|
|
"questionId": "q93",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Joshua Watsica",
|
|
"actual": "Joshua Watsica",
|
|
"correct": true,
|
|
"inputTokens": 6782,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1110
|
|
},
|
|
{
|
|
"questionId": "q93",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Joshua Watsica",
|
|
"actual": "Joshua Watsica",
|
|
"correct": true,
|
|
"inputTokens": 8414,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1819
|
|
},
|
|
{
|
|
"questionId": "q93",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Joshua Watsica",
|
|
"actual": "Joshua Watsica",
|
|
"correct": true,
|
|
"inputTokens": 9159,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1010
|
|
},
|
|
{
|
|
"questionId": "q93",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Joshua Watsica",
|
|
"actual": "Joshua Watsica",
|
|
"correct": true,
|
|
"inputTokens": 9289,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1224
|
|
},
|
|
{
|
|
"questionId": "q93",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "Joshua Watsica",
|
|
"actual": "Joshua Watsica",
|
|
"correct": true,
|
|
"inputTokens": 7374,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1430
|
|
},
|
|
{
|
|
"questionId": "q93",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "Joshua Watsica",
|
|
"actual": "Joshua Watsica",
|
|
"correct": true,
|
|
"inputTokens": 8385,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1158
|
|
},
|
|
{
|
|
"questionId": "q94",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "10",
|
|
"actual": "10",
|
|
"correct": true,
|
|
"inputTokens": 9736,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1352
|
|
},
|
|
{
|
|
"questionId": "q94",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "10",
|
|
"actual": "8",
|
|
"correct": false,
|
|
"inputTokens": 11902,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1498
|
|
},
|
|
{
|
|
"questionId": "q94",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "10",
|
|
"actual": "12",
|
|
"correct": false,
|
|
"inputTokens": 6010,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1249
|
|
},
|
|
{
|
|
"questionId": "q94",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "10",
|
|
"actual": "8",
|
|
"correct": false,
|
|
"inputTokens": 6988,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1080
|
|
},
|
|
{
|
|
"questionId": "q94",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "10",
|
|
"actual": "12",
|
|
"correct": false,
|
|
"inputTokens": 6778,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1760
|
|
},
|
|
{
|
|
"questionId": "q94",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "10",
|
|
"actual": "8",
|
|
"correct": false,
|
|
"inputTokens": 8409,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1156
|
|
},
|
|
{
|
|
"questionId": "q94",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "10",
|
|
"actual": "10",
|
|
"correct": true,
|
|
"inputTokens": 9155,
|
|
"outputTokens": 2,
|
|
"latencyMs": 9923
|
|
},
|
|
{
|
|
"questionId": "q94",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "10",
|
|
"actual": "8",
|
|
"correct": false,
|
|
"inputTokens": 9284,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1138
|
|
},
|
|
{
|
|
"questionId": "q94",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "10",
|
|
"actual": "12",
|
|
"correct": false,
|
|
"inputTokens": 7370,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1070
|
|
},
|
|
{
|
|
"questionId": "q94",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "10",
|
|
"actual": "8",
|
|
"correct": false,
|
|
"inputTokens": 8380,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1114
|
|
},
|
|
{
|
|
"questionId": "q95",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "10",
|
|
"actual": "10",
|
|
"correct": true,
|
|
"inputTokens": 9736,
|
|
"outputTokens": 2,
|
|
"latencyMs": 830
|
|
},
|
|
{
|
|
"questionId": "q95",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "10",
|
|
"actual": "8",
|
|
"correct": false,
|
|
"inputTokens": 11902,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1085
|
|
},
|
|
{
|
|
"questionId": "q95",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "10",
|
|
"actual": "10",
|
|
"correct": true,
|
|
"inputTokens": 6010,
|
|
"outputTokens": 2,
|
|
"latencyMs": 2362
|
|
},
|
|
{
|
|
"questionId": "q95",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "10",
|
|
"actual": "7",
|
|
"correct": false,
|
|
"inputTokens": 6988,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1198
|
|
},
|
|
{
|
|
"questionId": "q95",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "10",
|
|
"actual": "10",
|
|
"correct": true,
|
|
"inputTokens": 6778,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1630
|
|
},
|
|
{
|
|
"questionId": "q95",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "10",
|
|
"actual": "8",
|
|
"correct": false,
|
|
"inputTokens": 8409,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1219
|
|
},
|
|
{
|
|
"questionId": "q95",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "10",
|
|
"actual": "10",
|
|
"correct": true,
|
|
"inputTokens": 9155,
|
|
"outputTokens": 2,
|
|
"latencyMs": 2666
|
|
},
|
|
{
|
|
"questionId": "q95",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "10",
|
|
"actual": "8",
|
|
"correct": false,
|
|
"inputTokens": 9284,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1044
|
|
},
|
|
{
|
|
"questionId": "q95",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "10",
|
|
"actual": "12",
|
|
"correct": false,
|
|
"inputTokens": 7370,
|
|
"outputTokens": 2,
|
|
"latencyMs": 2187
|
|
},
|
|
{
|
|
"questionId": "q95",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "10",
|
|
"actual": "8",
|
|
"correct": false,
|
|
"inputTokens": 8380,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1313
|
|
},
|
|
{
|
|
"questionId": "q96",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "10",
|
|
"actual": "20",
|
|
"correct": false,
|
|
"inputTokens": 9737,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1087
|
|
},
|
|
{
|
|
"questionId": "q96",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "10",
|
|
"actual": "8",
|
|
"correct": false,
|
|
"inputTokens": 11902,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1292
|
|
},
|
|
{
|
|
"questionId": "q96",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "10",
|
|
"actual": "15",
|
|
"correct": false,
|
|
"inputTokens": 6011,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1979
|
|
},
|
|
{
|
|
"questionId": "q96",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "10",
|
|
"actual": "7",
|
|
"correct": false,
|
|
"inputTokens": 6988,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1095
|
|
},
|
|
{
|
|
"questionId": "q96",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "10",
|
|
"actual": "15",
|
|
"correct": false,
|
|
"inputTokens": 6779,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1385
|
|
},
|
|
{
|
|
"questionId": "q96",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "10",
|
|
"actual": "8",
|
|
"correct": false,
|
|
"inputTokens": 8409,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1507
|
|
},
|
|
{
|
|
"questionId": "q96",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "10",
|
|
"actual": "10",
|
|
"correct": true,
|
|
"inputTokens": 9156,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1579
|
|
},
|
|
{
|
|
"questionId": "q96",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "10",
|
|
"actual": "8",
|
|
"correct": false,
|
|
"inputTokens": 9284,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1365
|
|
},
|
|
{
|
|
"questionId": "q96",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "10",
|
|
"actual": "20",
|
|
"correct": false,
|
|
"inputTokens": 7371,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1661
|
|
},
|
|
{
|
|
"questionId": "q96",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "10",
|
|
"actual": "7",
|
|
"correct": false,
|
|
"inputTokens": 8380,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1423
|
|
},
|
|
{
|
|
"questionId": "q97",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "10",
|
|
"actual": "15",
|
|
"correct": false,
|
|
"inputTokens": 9737,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1815
|
|
},
|
|
{
|
|
"questionId": "q97",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "10",
|
|
"actual": "10",
|
|
"correct": true,
|
|
"inputTokens": 11902,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1345
|
|
},
|
|
{
|
|
"questionId": "q97",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "10",
|
|
"actual": "10",
|
|
"correct": true,
|
|
"inputTokens": 6011,
|
|
"outputTokens": 2,
|
|
"latencyMs": 2193
|
|
},
|
|
{
|
|
"questionId": "q97",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "10",
|
|
"actual": "10",
|
|
"correct": true,
|
|
"inputTokens": 6988,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1417
|
|
},
|
|
{
|
|
"questionId": "q97",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "10",
|
|
"actual": "15",
|
|
"correct": false,
|
|
"inputTokens": 6779,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1721
|
|
},
|
|
{
|
|
"questionId": "q97",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "10",
|
|
"actual": "10",
|
|
"correct": true,
|
|
"inputTokens": 8409,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1114
|
|
},
|
|
{
|
|
"questionId": "q97",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "10",
|
|
"actual": "15",
|
|
"correct": false,
|
|
"inputTokens": 9156,
|
|
"outputTokens": 2,
|
|
"latencyMs": 2208
|
|
},
|
|
{
|
|
"questionId": "q97",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "10",
|
|
"actual": "10",
|
|
"correct": true,
|
|
"inputTokens": 9284,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1895
|
|
},
|
|
{
|
|
"questionId": "q97",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "10",
|
|
"actual": "15",
|
|
"correct": false,
|
|
"inputTokens": 7371,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1287
|
|
},
|
|
{
|
|
"questionId": "q97",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "10",
|
|
"actual": "10",
|
|
"correct": true,
|
|
"inputTokens": 8380,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1281
|
|
},
|
|
{
|
|
"questionId": "q98",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "10",
|
|
"actual": "10",
|
|
"correct": true,
|
|
"inputTokens": 9737,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1387
|
|
},
|
|
{
|
|
"questionId": "q98",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "10",
|
|
"actual": "8",
|
|
"correct": false,
|
|
"inputTokens": 11902,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1243
|
|
},
|
|
{
|
|
"questionId": "q98",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "10",
|
|
"actual": "10",
|
|
"correct": true,
|
|
"inputTokens": 6011,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1284
|
|
},
|
|
{
|
|
"questionId": "q98",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "10",
|
|
"actual": "8",
|
|
"correct": false,
|
|
"inputTokens": 6988,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1161
|
|
},
|
|
{
|
|
"questionId": "q98",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "10",
|
|
"actual": "15",
|
|
"correct": false,
|
|
"inputTokens": 6779,
|
|
"outputTokens": 2,
|
|
"latencyMs": 10406
|
|
},
|
|
{
|
|
"questionId": "q98",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "10",
|
|
"actual": "10",
|
|
"correct": true,
|
|
"inputTokens": 8409,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1335
|
|
},
|
|
{
|
|
"questionId": "q98",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "10",
|
|
"actual": "10",
|
|
"correct": true,
|
|
"inputTokens": 9156,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1517
|
|
},
|
|
{
|
|
"questionId": "q98",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "10",
|
|
"actual": "10",
|
|
"correct": true,
|
|
"inputTokens": 9284,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1702
|
|
},
|
|
{
|
|
"questionId": "q98",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "10",
|
|
"actual": "10",
|
|
"correct": true,
|
|
"inputTokens": 7371,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1676
|
|
},
|
|
{
|
|
"questionId": "q98",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "10",
|
|
"actual": "8",
|
|
"correct": false,
|
|
"inputTokens": 8380,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1218
|
|
},
|
|
{
|
|
"questionId": "q99",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "42342.25",
|
|
"actual": "$50,000.00",
|
|
"correct": false,
|
|
"inputTokens": 9737,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1407
|
|
},
|
|
{
|
|
"questionId": "q99",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "42342.25",
|
|
"actual": "50,847.47",
|
|
"correct": false,
|
|
"inputTokens": 11902,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1443
|
|
},
|
|
{
|
|
"questionId": "q99",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "42342.25",
|
|
"actual": "Total revenue across all orders is 42,195.36.",
|
|
"correct": false,
|
|
"inputTokens": 6011,
|
|
"outputTokens": 14,
|
|
"latencyMs": 1150
|
|
},
|
|
{
|
|
"questionId": "q99",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "42342.25",
|
|
"actual": "41,847.47",
|
|
"correct": false,
|
|
"inputTokens": 6988,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1774
|
|
},
|
|
{
|
|
"questionId": "q99",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "42342.25",
|
|
"actual": "$32,186.73",
|
|
"correct": false,
|
|
"inputTokens": 6779,
|
|
"outputTokens": 7,
|
|
"latencyMs": 2654
|
|
},
|
|
{
|
|
"questionId": "q99",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "42342.25",
|
|
"actual": "48,847.47",
|
|
"correct": false,
|
|
"inputTokens": 8409,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1386
|
|
},
|
|
{
|
|
"questionId": "q99",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "42342.25",
|
|
"actual": "$34,186.73",
|
|
"correct": false,
|
|
"inputTokens": 9156,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1506
|
|
},
|
|
{
|
|
"questionId": "q99",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "42342.25",
|
|
"actual": "48,847.47",
|
|
"correct": false,
|
|
"inputTokens": 9284,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1509
|
|
},
|
|
{
|
|
"questionId": "q99",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "42342.25",
|
|
"actual": "Total revenue across all orders is 48780.73.",
|
|
"correct": false,
|
|
"inputTokens": 7371,
|
|
"outputTokens": 13,
|
|
"latencyMs": 1700
|
|
},
|
|
{
|
|
"questionId": "q99",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "42342.25",
|
|
"actual": "47,847.47",
|
|
"correct": false,
|
|
"inputTokens": 8380,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1230
|
|
},
|
|
{
|
|
"questionId": "q100",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "44",
|
|
"actual": "36",
|
|
"correct": false,
|
|
"inputTokens": 9739,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1725
|
|
},
|
|
{
|
|
"questionId": "q100",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "44",
|
|
"actual": "48",
|
|
"correct": false,
|
|
"inputTokens": 11904,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1377
|
|
},
|
|
{
|
|
"questionId": "q100",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "44",
|
|
"actual": "34",
|
|
"correct": false,
|
|
"inputTokens": 6013,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1399
|
|
},
|
|
{
|
|
"questionId": "q100",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "44",
|
|
"actual": "47",
|
|
"correct": false,
|
|
"inputTokens": 6990,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1094
|
|
},
|
|
{
|
|
"questionId": "q100",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "44",
|
|
"actual": "34",
|
|
"correct": false,
|
|
"inputTokens": 6781,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1617
|
|
},
|
|
{
|
|
"questionId": "q100",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "44",
|
|
"actual": "47",
|
|
"correct": false,
|
|
"inputTokens": 8411,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1344
|
|
},
|
|
{
|
|
"questionId": "q100",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "44",
|
|
"actual": "36",
|
|
"correct": false,
|
|
"inputTokens": 9158,
|
|
"outputTokens": 2,
|
|
"latencyMs": 2396
|
|
},
|
|
{
|
|
"questionId": "q100",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "44",
|
|
"actual": "48",
|
|
"correct": false,
|
|
"inputTokens": 9286,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1145
|
|
},
|
|
{
|
|
"questionId": "q100",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "44",
|
|
"actual": "36",
|
|
"correct": false,
|
|
"inputTokens": 7373,
|
|
"outputTokens": 2,
|
|
"latencyMs": 951
|
|
},
|
|
{
|
|
"questionId": "q100",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "44",
|
|
"actual": "45",
|
|
"correct": false,
|
|
"inputTokens": 8382,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1311
|
|
},
|
|
{
|
|
"questionId": "q101",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "39",
|
|
"actual": "34",
|
|
"correct": false,
|
|
"inputTokens": 9739,
|
|
"outputTokens": 2,
|
|
"latencyMs": 866
|
|
},
|
|
{
|
|
"questionId": "q101",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "39",
|
|
"actual": "38",
|
|
"correct": false,
|
|
"inputTokens": 11904,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1964
|
|
},
|
|
{
|
|
"questionId": "q101",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "39",
|
|
"actual": "30",
|
|
"correct": false,
|
|
"inputTokens": 6013,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1994
|
|
},
|
|
{
|
|
"questionId": "q101",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "39",
|
|
"actual": "38",
|
|
"correct": false,
|
|
"inputTokens": 6990,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1277
|
|
},
|
|
{
|
|
"questionId": "q101",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "39",
|
|
"actual": "32",
|
|
"correct": false,
|
|
"inputTokens": 6781,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1884
|
|
},
|
|
{
|
|
"questionId": "q101",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "39",
|
|
"actual": "38",
|
|
"correct": false,
|
|
"inputTokens": 8411,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1282
|
|
},
|
|
{
|
|
"questionId": "q101",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "39",
|
|
"actual": "32",
|
|
"correct": false,
|
|
"inputTokens": 9158,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1761
|
|
},
|
|
{
|
|
"questionId": "q101",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "39",
|
|
"actual": "38",
|
|
"correct": false,
|
|
"inputTokens": 9286,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1250
|
|
},
|
|
{
|
|
"questionId": "q101",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "39",
|
|
"actual": "32",
|
|
"correct": false,
|
|
"inputTokens": 7373,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1316
|
|
},
|
|
{
|
|
"questionId": "q101",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "39",
|
|
"actual": "38",
|
|
"correct": false,
|
|
"inputTokens": 8382,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1373
|
|
},
|
|
{
|
|
"questionId": "q102",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "32",
|
|
"actual": "27",
|
|
"correct": false,
|
|
"inputTokens": 9739,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1389
|
|
},
|
|
{
|
|
"questionId": "q102",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "32",
|
|
"actual": "28",
|
|
"correct": false,
|
|
"inputTokens": 11904,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1215
|
|
},
|
|
{
|
|
"questionId": "q102",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "32",
|
|
"actual": "24",
|
|
"correct": false,
|
|
"inputTokens": 6013,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1034
|
|
},
|
|
{
|
|
"questionId": "q102",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "32",
|
|
"actual": "26",
|
|
"correct": false,
|
|
"inputTokens": 6990,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1063
|
|
},
|
|
{
|
|
"questionId": "q102",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "32",
|
|
"actual": "25",
|
|
"correct": false,
|
|
"inputTokens": 6781,
|
|
"outputTokens": 2,
|
|
"latencyMs": 7312
|
|
},
|
|
{
|
|
"questionId": "q102",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "32",
|
|
"actual": "28",
|
|
"correct": false,
|
|
"inputTokens": 8411,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1387
|
|
},
|
|
{
|
|
"questionId": "q102",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "32",
|
|
"actual": "27",
|
|
"correct": false,
|
|
"inputTokens": 9158,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1488
|
|
},
|
|
{
|
|
"questionId": "q102",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "32",
|
|
"actual": "28",
|
|
"correct": false,
|
|
"inputTokens": 9286,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1268
|
|
},
|
|
{
|
|
"questionId": "q102",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "32",
|
|
"actual": "27",
|
|
"correct": false,
|
|
"inputTokens": 7373,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1274
|
|
},
|
|
{
|
|
"questionId": "q102",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "32",
|
|
"actual": "26",
|
|
"correct": false,
|
|
"inputTokens": 8382,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1354
|
|
},
|
|
{
|
|
"questionId": "q103",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "6975",
|
|
"actual": "6975",
|
|
"correct": true,
|
|
"inputTokens": 3713,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1330
|
|
},
|
|
{
|
|
"questionId": "q103",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "6975",
|
|
"actual": "6975",
|
|
"correct": true,
|
|
"inputTokens": 4080,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1437
|
|
},
|
|
{
|
|
"questionId": "q103",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "6975",
|
|
"actual": "6975",
|
|
"correct": true,
|
|
"inputTokens": 1564,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1341
|
|
},
|
|
{
|
|
"questionId": "q103",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "6975",
|
|
"actual": "6975",
|
|
"correct": true,
|
|
"inputTokens": 1509,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1231
|
|
},
|
|
{
|
|
"questionId": "q103",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "6975",
|
|
"actual": "6975",
|
|
"correct": true,
|
|
"inputTokens": 1442,
|
|
"outputTokens": 3,
|
|
"latencyMs": 2515
|
|
},
|
|
{
|
|
"questionId": "q103",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "6975",
|
|
"actual": "6975",
|
|
"correct": true,
|
|
"inputTokens": 1445,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1162
|
|
},
|
|
{
|
|
"questionId": "q103",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "6975",
|
|
"actual": "6975",
|
|
"correct": true,
|
|
"inputTokens": 3830,
|
|
"outputTokens": 3,
|
|
"latencyMs": 868
|
|
},
|
|
{
|
|
"questionId": "q103",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "6975",
|
|
"actual": "6975",
|
|
"correct": true,
|
|
"inputTokens": 3415,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1149
|
|
},
|
|
{
|
|
"questionId": "q103",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "6975",
|
|
"actual": "6975",
|
|
"correct": true,
|
|
"inputTokens": 2986,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1183
|
|
},
|
|
{
|
|
"questionId": "q103",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "6975",
|
|
"actual": "6975",
|
|
"correct": true,
|
|
"inputTokens": 3110,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1119
|
|
},
|
|
{
|
|
"questionId": "q104",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "6686.23",
|
|
"actual": "6686.23",
|
|
"correct": true,
|
|
"inputTokens": 3712,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1273
|
|
},
|
|
{
|
|
"questionId": "q104",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "6686.23",
|
|
"actual": "6686.23",
|
|
"correct": true,
|
|
"inputTokens": 4079,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1371
|
|
},
|
|
{
|
|
"questionId": "q104",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "6686.23",
|
|
"actual": "6686.23",
|
|
"correct": true,
|
|
"inputTokens": 1563,
|
|
"outputTokens": 5,
|
|
"latencyMs": 2052
|
|
},
|
|
{
|
|
"questionId": "q104",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "6686.23",
|
|
"actual": "6686.23",
|
|
"correct": true,
|
|
"inputTokens": 1508,
|
|
"outputTokens": 8,
|
|
"latencyMs": 997
|
|
},
|
|
{
|
|
"questionId": "q104",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "6686.23",
|
|
"actual": "6686.23",
|
|
"correct": true,
|
|
"inputTokens": 1441,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1152
|
|
},
|
|
{
|
|
"questionId": "q104",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "6686.23",
|
|
"actual": "6686.23",
|
|
"correct": true,
|
|
"inputTokens": 1444,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1188
|
|
},
|
|
{
|
|
"questionId": "q104",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "6686.23",
|
|
"actual": "6686.23",
|
|
"correct": true,
|
|
"inputTokens": 3829,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1259
|
|
},
|
|
{
|
|
"questionId": "q104",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "6686.23",
|
|
"actual": "6686.23",
|
|
"correct": true,
|
|
"inputTokens": 3414,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1239
|
|
},
|
|
{
|
|
"questionId": "q104",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "6686.23",
|
|
"actual": "6686.23",
|
|
"correct": true,
|
|
"inputTokens": 2985,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1096
|
|
},
|
|
{
|
|
"questionId": "q104",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "6686.23",
|
|
"actual": "6686.23",
|
|
"correct": true,
|
|
"inputTokens": 3109,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1247
|
|
},
|
|
{
|
|
"questionId": "q105",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "7500",
|
|
"actual": "7500",
|
|
"correct": true,
|
|
"inputTokens": 3713,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1354
|
|
},
|
|
{
|
|
"questionId": "q105",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "7500",
|
|
"actual": "7500",
|
|
"correct": true,
|
|
"inputTokens": 4080,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1083
|
|
},
|
|
{
|
|
"questionId": "q105",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "7500",
|
|
"actual": "7500",
|
|
"correct": true,
|
|
"inputTokens": 1564,
|
|
"outputTokens": 3,
|
|
"latencyMs": 869
|
|
},
|
|
{
|
|
"questionId": "q105",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "7500",
|
|
"actual": "7500",
|
|
"correct": true,
|
|
"inputTokens": 1509,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1051
|
|
},
|
|
{
|
|
"questionId": "q105",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "7500",
|
|
"actual": "7500",
|
|
"correct": true,
|
|
"inputTokens": 1442,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1528
|
|
},
|
|
{
|
|
"questionId": "q105",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "7500",
|
|
"actual": "7500",
|
|
"correct": true,
|
|
"inputTokens": 1445,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1126
|
|
},
|
|
{
|
|
"questionId": "q105",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "7500",
|
|
"actual": "7500",
|
|
"correct": true,
|
|
"inputTokens": 3830,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1136
|
|
},
|
|
{
|
|
"questionId": "q105",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "7500",
|
|
"actual": "7500",
|
|
"correct": true,
|
|
"inputTokens": 3415,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1121
|
|
},
|
|
{
|
|
"questionId": "q105",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "7500",
|
|
"actual": "7500",
|
|
"correct": true,
|
|
"inputTokens": 2986,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1217
|
|
},
|
|
{
|
|
"questionId": "q105",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "7500",
|
|
"actual": "7500",
|
|
"correct": true,
|
|
"inputTokens": 3110,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1099
|
|
},
|
|
{
|
|
"questionId": "q106",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "14297.05",
|
|
"actual": "14297.05",
|
|
"correct": true,
|
|
"inputTokens": 3712,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1416
|
|
},
|
|
{
|
|
"questionId": "q106",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "14297.05",
|
|
"actual": "14297.05",
|
|
"correct": true,
|
|
"inputTokens": 4079,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1526
|
|
},
|
|
{
|
|
"questionId": "q106",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "14297.05",
|
|
"actual": "14297.05",
|
|
"correct": true,
|
|
"inputTokens": 1563,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1350
|
|
},
|
|
{
|
|
"questionId": "q106",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "14297.05",
|
|
"actual": "14297.05",
|
|
"correct": true,
|
|
"inputTokens": 1508,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1330
|
|
},
|
|
{
|
|
"questionId": "q106",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "14297.05",
|
|
"actual": "14297.05",
|
|
"correct": true,
|
|
"inputTokens": 1441,
|
|
"outputTokens": 5,
|
|
"latencyMs": 2337
|
|
},
|
|
{
|
|
"questionId": "q106",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "14297.05",
|
|
"actual": "14297.05",
|
|
"correct": true,
|
|
"inputTokens": 1444,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1171
|
|
},
|
|
{
|
|
"questionId": "q106",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "14297.05",
|
|
"actual": "14297.05",
|
|
"correct": true,
|
|
"inputTokens": 3829,
|
|
"outputTokens": 5,
|
|
"latencyMs": 3128
|
|
},
|
|
{
|
|
"questionId": "q106",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "14297.05",
|
|
"actual": "14297.05",
|
|
"correct": true,
|
|
"inputTokens": 3414,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1151
|
|
},
|
|
{
|
|
"questionId": "q106",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "14297.05",
|
|
"actual": "14297.05",
|
|
"correct": true,
|
|
"inputTokens": 2985,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1988
|
|
},
|
|
{
|
|
"questionId": "q106",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "14297.05",
|
|
"actual": "14297.05",
|
|
"correct": true,
|
|
"inputTokens": 3109,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1166
|
|
},
|
|
{
|
|
"questionId": "q107",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "6692",
|
|
"actual": "6692",
|
|
"correct": true,
|
|
"inputTokens": 3713,
|
|
"outputTokens": 3,
|
|
"latencyMs": 2217
|
|
},
|
|
{
|
|
"questionId": "q107",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "6692",
|
|
"actual": "6692",
|
|
"correct": true,
|
|
"inputTokens": 4080,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1114
|
|
},
|
|
{
|
|
"questionId": "q107",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "6692",
|
|
"actual": "6692",
|
|
"correct": true,
|
|
"inputTokens": 1564,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1360
|
|
},
|
|
{
|
|
"questionId": "q107",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "6692",
|
|
"actual": "6692",
|
|
"correct": true,
|
|
"inputTokens": 1509,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1079
|
|
},
|
|
{
|
|
"questionId": "q107",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "6692",
|
|
"actual": "6692",
|
|
"correct": true,
|
|
"inputTokens": 1442,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1951
|
|
},
|
|
{
|
|
"questionId": "q107",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "6692",
|
|
"actual": "6692",
|
|
"correct": true,
|
|
"inputTokens": 1445,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1173
|
|
},
|
|
{
|
|
"questionId": "q107",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "6692",
|
|
"actual": "6692",
|
|
"correct": true,
|
|
"inputTokens": 3830,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1076
|
|
},
|
|
{
|
|
"questionId": "q107",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "6692",
|
|
"actual": "6692",
|
|
"correct": true,
|
|
"inputTokens": 3415,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1098
|
|
},
|
|
{
|
|
"questionId": "q107",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "6692",
|
|
"actual": "6692",
|
|
"correct": true,
|
|
"inputTokens": 2986,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1101
|
|
},
|
|
{
|
|
"questionId": "q107",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "6692",
|
|
"actual": "6692",
|
|
"correct": true,
|
|
"inputTokens": 3110,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1254
|
|
},
|
|
{
|
|
"questionId": "q108",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "9302.76",
|
|
"actual": "9302.76",
|
|
"correct": true,
|
|
"inputTokens": 3712,
|
|
"outputTokens": 5,
|
|
"latencyMs": 2041
|
|
},
|
|
{
|
|
"questionId": "q108",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "9302.76",
|
|
"actual": "9302.76",
|
|
"correct": true,
|
|
"inputTokens": 4079,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1405
|
|
},
|
|
{
|
|
"questionId": "q108",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "9302.76",
|
|
"actual": "9302.76",
|
|
"correct": true,
|
|
"inputTokens": 1563,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1170
|
|
},
|
|
{
|
|
"questionId": "q108",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "9302.76",
|
|
"actual": "9302.76",
|
|
"correct": true,
|
|
"inputTokens": 1508,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1161
|
|
},
|
|
{
|
|
"questionId": "q108",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "9302.76",
|
|
"actual": "9302.76",
|
|
"correct": true,
|
|
"inputTokens": 1441,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1326
|
|
},
|
|
{
|
|
"questionId": "q108",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "9302.76",
|
|
"actual": "9302.76",
|
|
"correct": true,
|
|
"inputTokens": 1444,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1259
|
|
},
|
|
{
|
|
"questionId": "q108",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "9302.76",
|
|
"actual": "9302.76",
|
|
"correct": true,
|
|
"inputTokens": 3829,
|
|
"outputTokens": 5,
|
|
"latencyMs": 3006
|
|
},
|
|
{
|
|
"questionId": "q108",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "9302.76",
|
|
"actual": "9302.76",
|
|
"correct": true,
|
|
"inputTokens": 3414,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1461
|
|
},
|
|
{
|
|
"questionId": "q108",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "9302.76",
|
|
"actual": "9302.76",
|
|
"correct": true,
|
|
"inputTokens": 2985,
|
|
"outputTokens": 5,
|
|
"latencyMs": 3824
|
|
},
|
|
{
|
|
"questionId": "q108",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "9302.76",
|
|
"actual": "9302.76",
|
|
"correct": true,
|
|
"inputTokens": 3109,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1391
|
|
},
|
|
{
|
|
"questionId": "q109",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "3285",
|
|
"actual": "3285",
|
|
"correct": true,
|
|
"inputTokens": 3713,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1091
|
|
},
|
|
{
|
|
"questionId": "q109",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "3285",
|
|
"actual": "3285",
|
|
"correct": true,
|
|
"inputTokens": 4080,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1188
|
|
},
|
|
{
|
|
"questionId": "q109",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "3285",
|
|
"actual": "3285",
|
|
"correct": true,
|
|
"inputTokens": 1564,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1450
|
|
},
|
|
{
|
|
"questionId": "q109",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "3285",
|
|
"actual": "3285",
|
|
"correct": true,
|
|
"inputTokens": 1509,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1614
|
|
},
|
|
{
|
|
"questionId": "q109",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "3285",
|
|
"actual": "3285",
|
|
"correct": true,
|
|
"inputTokens": 1442,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1642
|
|
},
|
|
{
|
|
"questionId": "q109",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "3285",
|
|
"actual": "3285",
|
|
"correct": true,
|
|
"inputTokens": 1445,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1311
|
|
},
|
|
{
|
|
"questionId": "q109",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "3285",
|
|
"actual": "3285",
|
|
"correct": true,
|
|
"inputTokens": 3830,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1201
|
|
},
|
|
{
|
|
"questionId": "q109",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "3285",
|
|
"actual": "3285",
|
|
"correct": true,
|
|
"inputTokens": 3415,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1261
|
|
},
|
|
{
|
|
"questionId": "q109",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "3285",
|
|
"actual": "3285",
|
|
"correct": true,
|
|
"inputTokens": 2986,
|
|
"outputTokens": 3,
|
|
"latencyMs": 856
|
|
},
|
|
{
|
|
"questionId": "q109",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "3285",
|
|
"actual": "3285",
|
|
"correct": true,
|
|
"inputTokens": 3110,
|
|
"outputTokens": 6,
|
|
"latencyMs": 980
|
|
},
|
|
{
|
|
"questionId": "q110",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "3826.93",
|
|
"actual": "3826.93",
|
|
"correct": true,
|
|
"inputTokens": 3712,
|
|
"outputTokens": 5,
|
|
"latencyMs": 3090
|
|
},
|
|
{
|
|
"questionId": "q110",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "3826.93",
|
|
"actual": "3826.93",
|
|
"correct": true,
|
|
"inputTokens": 4079,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1123
|
|
},
|
|
{
|
|
"questionId": "q110",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "3826.93",
|
|
"actual": "3826.93",
|
|
"correct": true,
|
|
"inputTokens": 1563,
|
|
"outputTokens": 5,
|
|
"latencyMs": 2911
|
|
},
|
|
{
|
|
"questionId": "q110",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "3826.93",
|
|
"actual": "3826.93",
|
|
"correct": true,
|
|
"inputTokens": 1508,
|
|
"outputTokens": 8,
|
|
"latencyMs": 979
|
|
},
|
|
{
|
|
"questionId": "q110",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "3826.93",
|
|
"actual": "3826.93",
|
|
"correct": true,
|
|
"inputTokens": 1441,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1118
|
|
},
|
|
{
|
|
"questionId": "q110",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "3826.93",
|
|
"actual": "3826.93",
|
|
"correct": true,
|
|
"inputTokens": 1444,
|
|
"outputTokens": 8,
|
|
"latencyMs": 943
|
|
},
|
|
{
|
|
"questionId": "q110",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "3826.93",
|
|
"actual": "3826.93",
|
|
"correct": true,
|
|
"inputTokens": 3829,
|
|
"outputTokens": 5,
|
|
"latencyMs": 2639
|
|
},
|
|
{
|
|
"questionId": "q110",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "3826.93",
|
|
"actual": "3826.93",
|
|
"correct": true,
|
|
"inputTokens": 3414,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1187
|
|
},
|
|
{
|
|
"questionId": "q110",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "3826.93",
|
|
"actual": "3826.93",
|
|
"correct": true,
|
|
"inputTokens": 2985,
|
|
"outputTokens": 5,
|
|
"latencyMs": 2402
|
|
},
|
|
{
|
|
"questionId": "q110",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "3826.93",
|
|
"actual": "3826.93",
|
|
"correct": true,
|
|
"inputTokens": 3109,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1723
|
|
},
|
|
{
|
|
"questionId": "q111",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "6191",
|
|
"actual": "6191",
|
|
"correct": true,
|
|
"inputTokens": 3713,
|
|
"outputTokens": 3,
|
|
"latencyMs": 2401
|
|
},
|
|
{
|
|
"questionId": "q111",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "6191",
|
|
"actual": "6191",
|
|
"correct": true,
|
|
"inputTokens": 4080,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1117
|
|
},
|
|
{
|
|
"questionId": "q111",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "6191",
|
|
"actual": "6191",
|
|
"correct": true,
|
|
"inputTokens": 1564,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1568
|
|
},
|
|
{
|
|
"questionId": "q111",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "6191",
|
|
"actual": "6191",
|
|
"correct": true,
|
|
"inputTokens": 1509,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1132
|
|
},
|
|
{
|
|
"questionId": "q111",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "6191",
|
|
"actual": "6191",
|
|
"correct": true,
|
|
"inputTokens": 1442,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1478
|
|
},
|
|
{
|
|
"questionId": "q111",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "6191",
|
|
"actual": "6191",
|
|
"correct": true,
|
|
"inputTokens": 1445,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1831
|
|
},
|
|
{
|
|
"questionId": "q111",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "6191",
|
|
"actual": "6191",
|
|
"correct": true,
|
|
"inputTokens": 3830,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1631
|
|
},
|
|
{
|
|
"questionId": "q111",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "6191",
|
|
"actual": "6191",
|
|
"correct": true,
|
|
"inputTokens": 3415,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1371
|
|
},
|
|
{
|
|
"questionId": "q111",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "6191",
|
|
"actual": "6191",
|
|
"correct": true,
|
|
"inputTokens": 2986,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1209
|
|
},
|
|
{
|
|
"questionId": "q111",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "6191",
|
|
"actual": "6191",
|
|
"correct": true,
|
|
"inputTokens": 3110,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1411
|
|
},
|
|
{
|
|
"questionId": "q112",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "1854.66",
|
|
"actual": "1854.66",
|
|
"correct": true,
|
|
"inputTokens": 3712,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1773
|
|
},
|
|
{
|
|
"questionId": "q112",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "1854.66",
|
|
"actual": "1854.66",
|
|
"correct": true,
|
|
"inputTokens": 4079,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1090
|
|
},
|
|
{
|
|
"questionId": "q112",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "1854.66",
|
|
"actual": "1854.66",
|
|
"correct": true,
|
|
"inputTokens": 1563,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1354
|
|
},
|
|
{
|
|
"questionId": "q112",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "1854.66",
|
|
"actual": "1854.66",
|
|
"correct": true,
|
|
"inputTokens": 1508,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1095
|
|
},
|
|
{
|
|
"questionId": "q112",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "1854.66",
|
|
"actual": "1854.66",
|
|
"correct": true,
|
|
"inputTokens": 1441,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1135
|
|
},
|
|
{
|
|
"questionId": "q112",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "1854.66",
|
|
"actual": "1854.66",
|
|
"correct": true,
|
|
"inputTokens": 1444,
|
|
"outputTokens": 8,
|
|
"latencyMs": 976
|
|
},
|
|
{
|
|
"questionId": "q112",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "1854.66",
|
|
"actual": "1854.66",
|
|
"correct": true,
|
|
"inputTokens": 3829,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1311
|
|
},
|
|
{
|
|
"questionId": "q112",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "1854.66",
|
|
"actual": "1854.66",
|
|
"correct": true,
|
|
"inputTokens": 3414,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1287
|
|
},
|
|
{
|
|
"questionId": "q112",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "1854.66",
|
|
"actual": "1854.66",
|
|
"correct": true,
|
|
"inputTokens": 2985,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1288
|
|
},
|
|
{
|
|
"questionId": "q112",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "1854.66",
|
|
"actual": "1854.66",
|
|
"correct": true,
|
|
"inputTokens": 3109,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1157
|
|
},
|
|
{
|
|
"questionId": "q113",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "4696",
|
|
"actual": "4696",
|
|
"correct": true,
|
|
"inputTokens": 3713,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1328
|
|
},
|
|
{
|
|
"questionId": "q113",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "4696",
|
|
"actual": "4696",
|
|
"correct": true,
|
|
"inputTokens": 4080,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1068
|
|
},
|
|
{
|
|
"questionId": "q113",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "4696",
|
|
"actual": "4696",
|
|
"correct": true,
|
|
"inputTokens": 1564,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1020
|
|
},
|
|
{
|
|
"questionId": "q113",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "4696",
|
|
"actual": "4696",
|
|
"correct": true,
|
|
"inputTokens": 1509,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1069
|
|
},
|
|
{
|
|
"questionId": "q113",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "4696",
|
|
"actual": "4696",
|
|
"correct": true,
|
|
"inputTokens": 1442,
|
|
"outputTokens": 3,
|
|
"latencyMs": 968
|
|
},
|
|
{
|
|
"questionId": "q113",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "4696",
|
|
"actual": "4696",
|
|
"correct": true,
|
|
"inputTokens": 1445,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1436
|
|
},
|
|
{
|
|
"questionId": "q113",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "4696",
|
|
"actual": "4696",
|
|
"correct": true,
|
|
"inputTokens": 3830,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1171
|
|
},
|
|
{
|
|
"questionId": "q113",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "4696",
|
|
"actual": "4696",
|
|
"correct": true,
|
|
"inputTokens": 3415,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1273
|
|
},
|
|
{
|
|
"questionId": "q113",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "4696",
|
|
"actual": "4696",
|
|
"correct": true,
|
|
"inputTokens": 2986,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1788
|
|
},
|
|
{
|
|
"questionId": "q113",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "4696",
|
|
"actual": "4696",
|
|
"correct": true,
|
|
"inputTokens": 3110,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1050
|
|
},
|
|
{
|
|
"questionId": "q114",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "4211.6",
|
|
"actual": "4211.6",
|
|
"correct": true,
|
|
"inputTokens": 3712,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1414
|
|
},
|
|
{
|
|
"questionId": "q114",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "4211.6",
|
|
"actual": "4211.6",
|
|
"correct": true,
|
|
"inputTokens": 4079,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1192
|
|
},
|
|
{
|
|
"questionId": "q114",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "4211.6",
|
|
"actual": "4211.6",
|
|
"correct": true,
|
|
"inputTokens": 1563,
|
|
"outputTokens": 5,
|
|
"latencyMs": 893
|
|
},
|
|
{
|
|
"questionId": "q114",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "4211.6",
|
|
"actual": "4211.6",
|
|
"correct": true,
|
|
"inputTokens": 1508,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1065
|
|
},
|
|
{
|
|
"questionId": "q114",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "4211.6",
|
|
"actual": "4211.6",
|
|
"correct": true,
|
|
"inputTokens": 1441,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1155
|
|
},
|
|
{
|
|
"questionId": "q114",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "4211.6",
|
|
"actual": "4211.6",
|
|
"correct": true,
|
|
"inputTokens": 1444,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1842
|
|
},
|
|
{
|
|
"questionId": "q114",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "4211.6",
|
|
"actual": "4211.6",
|
|
"correct": true,
|
|
"inputTokens": 3829,
|
|
"outputTokens": 5,
|
|
"latencyMs": 2740
|
|
},
|
|
{
|
|
"questionId": "q114",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "4211.6",
|
|
"actual": "4211.6",
|
|
"correct": true,
|
|
"inputTokens": 3414,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1295
|
|
},
|
|
{
|
|
"questionId": "q114",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "4211.6",
|
|
"actual": "4211.6",
|
|
"correct": true,
|
|
"inputTokens": 2985,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1053
|
|
},
|
|
{
|
|
"questionId": "q114",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "4211.6",
|
|
"actual": "4211.6",
|
|
"correct": true,
|
|
"inputTokens": 3109,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1118
|
|
},
|
|
{
|
|
"questionId": "q115",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "6196",
|
|
"actual": "6196",
|
|
"correct": true,
|
|
"inputTokens": 3713,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1452
|
|
},
|
|
{
|
|
"questionId": "q115",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "6196",
|
|
"actual": "6196",
|
|
"correct": true,
|
|
"inputTokens": 4080,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1272
|
|
},
|
|
{
|
|
"questionId": "q115",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "6196",
|
|
"actual": "6196",
|
|
"correct": true,
|
|
"inputTokens": 1564,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1039
|
|
},
|
|
{
|
|
"questionId": "q115",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "6196",
|
|
"actual": "6196",
|
|
"correct": true,
|
|
"inputTokens": 1509,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1155
|
|
},
|
|
{
|
|
"questionId": "q115",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "6196",
|
|
"actual": "6196",
|
|
"correct": true,
|
|
"inputTokens": 1442,
|
|
"outputTokens": 3,
|
|
"latencyMs": 796
|
|
},
|
|
{
|
|
"questionId": "q115",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "6196",
|
|
"actual": "6196",
|
|
"correct": true,
|
|
"inputTokens": 1445,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1048
|
|
},
|
|
{
|
|
"questionId": "q115",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "6196",
|
|
"actual": "6196",
|
|
"correct": true,
|
|
"inputTokens": 3830,
|
|
"outputTokens": 3,
|
|
"latencyMs": 2282
|
|
},
|
|
{
|
|
"questionId": "q115",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "6196",
|
|
"actual": "6196",
|
|
"correct": true,
|
|
"inputTokens": 3415,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1592
|
|
},
|
|
{
|
|
"questionId": "q115",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "6196",
|
|
"actual": "6196",
|
|
"correct": true,
|
|
"inputTokens": 2986,
|
|
"outputTokens": 3,
|
|
"latencyMs": 2691
|
|
},
|
|
{
|
|
"questionId": "q115",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "6196",
|
|
"actual": "6196",
|
|
"correct": true,
|
|
"inputTokens": 3110,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1126
|
|
},
|
|
{
|
|
"questionId": "q116",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "6105.3",
|
|
"actual": "6105.3",
|
|
"correct": true,
|
|
"inputTokens": 3712,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1288
|
|
},
|
|
{
|
|
"questionId": "q116",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "6105.3",
|
|
"actual": "6105.30",
|
|
"correct": true,
|
|
"inputTokens": 4079,
|
|
"outputTokens": 8,
|
|
"latencyMs": 991
|
|
},
|
|
{
|
|
"questionId": "q116",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "6105.3",
|
|
"actual": "6105.3",
|
|
"correct": true,
|
|
"inputTokens": 1563,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1257
|
|
},
|
|
{
|
|
"questionId": "q116",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "6105.3",
|
|
"actual": "6105.3",
|
|
"correct": true,
|
|
"inputTokens": 1508,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1004
|
|
},
|
|
{
|
|
"questionId": "q116",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "6105.3",
|
|
"actual": "6105.3",
|
|
"correct": true,
|
|
"inputTokens": 1441,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1620
|
|
},
|
|
{
|
|
"questionId": "q116",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "6105.3",
|
|
"actual": "6105.3",
|
|
"correct": true,
|
|
"inputTokens": 1444,
|
|
"outputTokens": 8,
|
|
"latencyMs": 991
|
|
},
|
|
{
|
|
"questionId": "q116",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "6105.3",
|
|
"actual": "6105.3",
|
|
"correct": true,
|
|
"inputTokens": 3829,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1048
|
|
},
|
|
{
|
|
"questionId": "q116",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "6105.3",
|
|
"actual": "6105.3",
|
|
"correct": true,
|
|
"inputTokens": 3414,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1189
|
|
},
|
|
{
|
|
"questionId": "q116",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "6105.3",
|
|
"actual": "6105.3",
|
|
"correct": true,
|
|
"inputTokens": 2985,
|
|
"outputTokens": 5,
|
|
"latencyMs": 3282
|
|
},
|
|
{
|
|
"questionId": "q116",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "6105.3",
|
|
"actual": "6105.3",
|
|
"correct": true,
|
|
"inputTokens": 3109,
|
|
"outputTokens": 8,
|
|
"latencyMs": 985
|
|
},
|
|
{
|
|
"questionId": "q117",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "6528",
|
|
"actual": "6528",
|
|
"correct": true,
|
|
"inputTokens": 3713,
|
|
"outputTokens": 3,
|
|
"latencyMs": 871
|
|
},
|
|
{
|
|
"questionId": "q117",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "6528",
|
|
"actual": "6528",
|
|
"correct": true,
|
|
"inputTokens": 4080,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1042
|
|
},
|
|
{
|
|
"questionId": "q117",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "6528",
|
|
"actual": "6528",
|
|
"correct": true,
|
|
"inputTokens": 1564,
|
|
"outputTokens": 3,
|
|
"latencyMs": 999
|
|
},
|
|
{
|
|
"questionId": "q117",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "6528",
|
|
"actual": "6528",
|
|
"correct": true,
|
|
"inputTokens": 1509,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1111
|
|
},
|
|
{
|
|
"questionId": "q117",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "6528",
|
|
"actual": "6528",
|
|
"correct": true,
|
|
"inputTokens": 1442,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1132
|
|
},
|
|
{
|
|
"questionId": "q117",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "6528",
|
|
"actual": "6528",
|
|
"correct": true,
|
|
"inputTokens": 1445,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1004
|
|
},
|
|
{
|
|
"questionId": "q117",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "6528",
|
|
"actual": "6528",
|
|
"correct": true,
|
|
"inputTokens": 3830,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1162
|
|
},
|
|
{
|
|
"questionId": "q117",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "6528",
|
|
"actual": "6528",
|
|
"correct": true,
|
|
"inputTokens": 3415,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1271
|
|
},
|
|
{
|
|
"questionId": "q117",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "6528",
|
|
"actual": "6528",
|
|
"correct": true,
|
|
"inputTokens": 2986,
|
|
"outputTokens": 3,
|
|
"latencyMs": 961
|
|
},
|
|
{
|
|
"questionId": "q117",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "6528",
|
|
"actual": "6528",
|
|
"correct": true,
|
|
"inputTokens": 3110,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1289
|
|
},
|
|
{
|
|
"questionId": "q118",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "1136.09",
|
|
"actual": "1136.09",
|
|
"correct": true,
|
|
"inputTokens": 3712,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1634
|
|
},
|
|
{
|
|
"questionId": "q118",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "1136.09",
|
|
"actual": "1136.09",
|
|
"correct": true,
|
|
"inputTokens": 4079,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1198
|
|
},
|
|
{
|
|
"questionId": "q118",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "1136.09",
|
|
"actual": "1136.09",
|
|
"correct": true,
|
|
"inputTokens": 1563,
|
|
"outputTokens": 5,
|
|
"latencyMs": 2678
|
|
},
|
|
{
|
|
"questionId": "q118",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "1136.09",
|
|
"actual": "1136.09",
|
|
"correct": true,
|
|
"inputTokens": 1508,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1155
|
|
},
|
|
{
|
|
"questionId": "q118",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "1136.09",
|
|
"actual": "1136.09",
|
|
"correct": true,
|
|
"inputTokens": 1441,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1104
|
|
},
|
|
{
|
|
"questionId": "q118",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "1136.09",
|
|
"actual": "1136.09",
|
|
"correct": true,
|
|
"inputTokens": 1444,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1109
|
|
},
|
|
{
|
|
"questionId": "q118",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "1136.09",
|
|
"actual": "1136.09",
|
|
"correct": true,
|
|
"inputTokens": 3829,
|
|
"outputTokens": 5,
|
|
"latencyMs": 3756
|
|
},
|
|
{
|
|
"questionId": "q118",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "1136.09",
|
|
"actual": "1136.09",
|
|
"correct": true,
|
|
"inputTokens": 3414,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1082
|
|
},
|
|
{
|
|
"questionId": "q118",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "1136.09",
|
|
"actual": "1136.09",
|
|
"correct": true,
|
|
"inputTokens": 2985,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1451
|
|
},
|
|
{
|
|
"questionId": "q118",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "1136.09",
|
|
"actual": "1136.09",
|
|
"correct": true,
|
|
"inputTokens": 3109,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1730
|
|
},
|
|
{
|
|
"questionId": "q119",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "4689",
|
|
"actual": "4689",
|
|
"correct": true,
|
|
"inputTokens": 3713,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1327
|
|
},
|
|
{
|
|
"questionId": "q119",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "4689",
|
|
"actual": "4689",
|
|
"correct": true,
|
|
"inputTokens": 4080,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1282
|
|
},
|
|
{
|
|
"questionId": "q119",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "4689",
|
|
"actual": "4689",
|
|
"correct": true,
|
|
"inputTokens": 1564,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1368
|
|
},
|
|
{
|
|
"questionId": "q119",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "4689",
|
|
"actual": "4689",
|
|
"correct": true,
|
|
"inputTokens": 1509,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1487
|
|
},
|
|
{
|
|
"questionId": "q119",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "4689",
|
|
"actual": "4689",
|
|
"correct": true,
|
|
"inputTokens": 1442,
|
|
"outputTokens": 3,
|
|
"latencyMs": 2752
|
|
},
|
|
{
|
|
"questionId": "q119",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "4689",
|
|
"actual": "4689",
|
|
"correct": true,
|
|
"inputTokens": 1445,
|
|
"outputTokens": 6,
|
|
"latencyMs": 909
|
|
},
|
|
{
|
|
"questionId": "q119",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "4689",
|
|
"actual": "4689",
|
|
"correct": true,
|
|
"inputTokens": 3830,
|
|
"outputTokens": 3,
|
|
"latencyMs": 3502
|
|
},
|
|
{
|
|
"questionId": "q119",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "4689",
|
|
"actual": "4689",
|
|
"correct": true,
|
|
"inputTokens": 3415,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1212
|
|
},
|
|
{
|
|
"questionId": "q119",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "4689",
|
|
"actual": "4689",
|
|
"correct": true,
|
|
"inputTokens": 2986,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1218
|
|
},
|
|
{
|
|
"questionId": "q119",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "4689",
|
|
"actual": "4689",
|
|
"correct": true,
|
|
"inputTokens": 3110,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1064
|
|
},
|
|
{
|
|
"questionId": "q120",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "2637.73",
|
|
"actual": "2637.73",
|
|
"correct": true,
|
|
"inputTokens": 3712,
|
|
"outputTokens": 5,
|
|
"latencyMs": 2777
|
|
},
|
|
{
|
|
"questionId": "q120",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "2637.73",
|
|
"actual": "2637.73",
|
|
"correct": true,
|
|
"inputTokens": 4079,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1246
|
|
},
|
|
{
|
|
"questionId": "q120",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "2637.73",
|
|
"actual": "2637.73",
|
|
"correct": true,
|
|
"inputTokens": 1563,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1424
|
|
},
|
|
{
|
|
"questionId": "q120",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "2637.73",
|
|
"actual": "2637.73",
|
|
"correct": true,
|
|
"inputTokens": 1508,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1074
|
|
},
|
|
{
|
|
"questionId": "q120",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "2637.73",
|
|
"actual": "2637.73",
|
|
"correct": true,
|
|
"inputTokens": 1441,
|
|
"outputTokens": 5,
|
|
"latencyMs": 2803
|
|
},
|
|
{
|
|
"questionId": "q120",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "2637.73",
|
|
"actual": "2637.73",
|
|
"correct": true,
|
|
"inputTokens": 1444,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1107
|
|
},
|
|
{
|
|
"questionId": "q120",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "2637.73",
|
|
"actual": "2637.73",
|
|
"correct": true,
|
|
"inputTokens": 3829,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1066
|
|
},
|
|
{
|
|
"questionId": "q120",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "2637.73",
|
|
"actual": "2637.73",
|
|
"correct": true,
|
|
"inputTokens": 3414,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1325
|
|
},
|
|
{
|
|
"questionId": "q120",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "2637.73",
|
|
"actual": "2637.73",
|
|
"correct": true,
|
|
"inputTokens": 2985,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1330
|
|
},
|
|
{
|
|
"questionId": "q120",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "2637.73",
|
|
"actual": "2637.73",
|
|
"correct": true,
|
|
"inputTokens": 3109,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1192
|
|
},
|
|
{
|
|
"questionId": "q121",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "5685",
|
|
"actual": "5685",
|
|
"correct": true,
|
|
"inputTokens": 3713,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1139
|
|
},
|
|
{
|
|
"questionId": "q121",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "5685",
|
|
"actual": "5685",
|
|
"correct": true,
|
|
"inputTokens": 4080,
|
|
"outputTokens": 6,
|
|
"latencyMs": 994
|
|
},
|
|
{
|
|
"questionId": "q121",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "5685",
|
|
"actual": "5685",
|
|
"correct": true,
|
|
"inputTokens": 1564,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1309
|
|
},
|
|
{
|
|
"questionId": "q121",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "5685",
|
|
"actual": "5685",
|
|
"correct": true,
|
|
"inputTokens": 1509,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1184
|
|
},
|
|
{
|
|
"questionId": "q121",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "5685",
|
|
"actual": "5685",
|
|
"correct": true,
|
|
"inputTokens": 1442,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1182
|
|
},
|
|
{
|
|
"questionId": "q121",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "5685",
|
|
"actual": "5685",
|
|
"correct": true,
|
|
"inputTokens": 1445,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1381
|
|
},
|
|
{
|
|
"questionId": "q121",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "5685",
|
|
"actual": "5685",
|
|
"correct": true,
|
|
"inputTokens": 3830,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1103
|
|
},
|
|
{
|
|
"questionId": "q121",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "5685",
|
|
"actual": "5685",
|
|
"correct": true,
|
|
"inputTokens": 3415,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1220
|
|
},
|
|
{
|
|
"questionId": "q121",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "5685",
|
|
"actual": "5685",
|
|
"correct": true,
|
|
"inputTokens": 2986,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1169
|
|
},
|
|
{
|
|
"questionId": "q121",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "5685",
|
|
"actual": "5685",
|
|
"correct": true,
|
|
"inputTokens": 3110,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1208
|
|
},
|
|
{
|
|
"questionId": "q122",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "3421.06",
|
|
"actual": "3421.06",
|
|
"correct": true,
|
|
"inputTokens": 3712,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1037
|
|
},
|
|
{
|
|
"questionId": "q122",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "3421.06",
|
|
"actual": "3421.06",
|
|
"correct": true,
|
|
"inputTokens": 4079,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1278
|
|
},
|
|
{
|
|
"questionId": "q122",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "3421.06",
|
|
"actual": "3421.06",
|
|
"correct": true,
|
|
"inputTokens": 1563,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1441
|
|
},
|
|
{
|
|
"questionId": "q122",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "3421.06",
|
|
"actual": "3421.06",
|
|
"correct": true,
|
|
"inputTokens": 1508,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1204
|
|
},
|
|
{
|
|
"questionId": "q122",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "3421.06",
|
|
"actual": "3421.06",
|
|
"correct": true,
|
|
"inputTokens": 1441,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1782
|
|
},
|
|
{
|
|
"questionId": "q122",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "3421.06",
|
|
"actual": "3421.06",
|
|
"correct": true,
|
|
"inputTokens": 1444,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1088
|
|
},
|
|
{
|
|
"questionId": "q122",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "3421.06",
|
|
"actual": "3421.06",
|
|
"correct": true,
|
|
"inputTokens": 3829,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1447
|
|
},
|
|
{
|
|
"questionId": "q122",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "3421.06",
|
|
"actual": "3421.06",
|
|
"correct": true,
|
|
"inputTokens": 3414,
|
|
"outputTokens": 8,
|
|
"latencyMs": 1356
|
|
},
|
|
{
|
|
"questionId": "q122",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "3421.06",
|
|
"actual": "3421.06",
|
|
"correct": true,
|
|
"inputTokens": 2985,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1309
|
|
},
|
|
{
|
|
"questionId": "q122",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "3421.06",
|
|
"actual": "3421.06",
|
|
"correct": true,
|
|
"inputTokens": 3109,
|
|
"outputTokens": 8,
|
|
"latencyMs": 995
|
|
},
|
|
{
|
|
"questionId": "q123",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "344498",
|
|
"actual": "188,000",
|
|
"correct": false,
|
|
"inputTokens": 3710,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1405
|
|
},
|
|
{
|
|
"questionId": "q123",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "344498",
|
|
"actual": "188,945",
|
|
"correct": false,
|
|
"inputTokens": 4077,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1110
|
|
},
|
|
{
|
|
"questionId": "q123",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "344498",
|
|
"actual": "186,000",
|
|
"correct": false,
|
|
"inputTokens": 1561,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1306
|
|
},
|
|
{
|
|
"questionId": "q123",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "344498",
|
|
"actual": "337,045",
|
|
"correct": false,
|
|
"inputTokens": 1506,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1292
|
|
},
|
|
{
|
|
"questionId": "q123",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "344498",
|
|
"actual": "188,000",
|
|
"correct": false,
|
|
"inputTokens": 1439,
|
|
"outputTokens": 4,
|
|
"latencyMs": 2659
|
|
},
|
|
{
|
|
"questionId": "q123",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "344498",
|
|
"actual": "372,915",
|
|
"correct": false,
|
|
"inputTokens": 1442,
|
|
"outputTokens": 7,
|
|
"latencyMs": 966
|
|
},
|
|
{
|
|
"questionId": "q123",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "344498",
|
|
"actual": "174,000",
|
|
"correct": false,
|
|
"inputTokens": 3827,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1177
|
|
},
|
|
{
|
|
"questionId": "q123",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "344498",
|
|
"actual": "188,647",
|
|
"correct": false,
|
|
"inputTokens": 3412,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1018
|
|
},
|
|
{
|
|
"questionId": "q123",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "344498",
|
|
"actual": "188,000",
|
|
"correct": false,
|
|
"inputTokens": 2983,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1659
|
|
},
|
|
{
|
|
"questionId": "q123",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "344498",
|
|
"actual": "181,854",
|
|
"correct": false,
|
|
"inputTokens": 3107,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1894
|
|
},
|
|
{
|
|
"questionId": "q124",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "312818.50",
|
|
"actual": "188,174.36",
|
|
"correct": false,
|
|
"inputTokens": 3708,
|
|
"outputTokens": 6,
|
|
"latencyMs": 2900
|
|
},
|
|
{
|
|
"questionId": "q124",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "312818.50",
|
|
"actual": "287,745.89",
|
|
"correct": false,
|
|
"inputTokens": 4075,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1196
|
|
},
|
|
{
|
|
"questionId": "q124",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "312818.50",
|
|
"actual": "Total revenue across all dates is 139,155.36.",
|
|
"correct": false,
|
|
"inputTokens": 1559,
|
|
"outputTokens": 14,
|
|
"latencyMs": 1401
|
|
},
|
|
{
|
|
"questionId": "q124",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "312818.50",
|
|
"actual": "487,891.45",
|
|
"correct": false,
|
|
"inputTokens": 1504,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1118
|
|
},
|
|
{
|
|
"questionId": "q124",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "312818.50",
|
|
"actual": "Total revenue across all dates is 155,000.00.",
|
|
"correct": false,
|
|
"inputTokens": 1437,
|
|
"outputTokens": 14,
|
|
"latencyMs": 1308
|
|
},
|
|
{
|
|
"questionId": "q124",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "312818.50",
|
|
"actual": "487,891.89",
|
|
"correct": false,
|
|
"inputTokens": 1440,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1120
|
|
},
|
|
{
|
|
"questionId": "q124",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "312818.50",
|
|
"actual": "Total revenue across all dates is 155,155.36.",
|
|
"correct": false,
|
|
"inputTokens": 3825,
|
|
"outputTokens": 14,
|
|
"latencyMs": 1143
|
|
},
|
|
{
|
|
"questionId": "q124",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "312818.50",
|
|
"actual": "381,968.89",
|
|
"correct": false,
|
|
"inputTokens": 3410,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1172
|
|
},
|
|
{
|
|
"questionId": "q124",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "312818.50",
|
|
"actual": "Total revenue across all dates is 155,155.36.",
|
|
"correct": false,
|
|
"inputTokens": 2981,
|
|
"outputTokens": 14,
|
|
"latencyMs": 1179
|
|
},
|
|
{
|
|
"questionId": "q124",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "312818.50",
|
|
"actual": "381,847.89",
|
|
"correct": false,
|
|
"inputTokens": 3105,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1073
|
|
},
|
|
{
|
|
"questionId": "q125",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "1811",
|
|
"actual": "1030",
|
|
"correct": false,
|
|
"inputTokens": 3710,
|
|
"outputTokens": 3,
|
|
"latencyMs": 3823
|
|
},
|
|
{
|
|
"questionId": "q125",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "1811",
|
|
"actual": "1,234",
|
|
"correct": false,
|
|
"inputTokens": 4078,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1153
|
|
},
|
|
{
|
|
"questionId": "q125",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "1811",
|
|
"actual": "1040",
|
|
"correct": false,
|
|
"inputTokens": 1561,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1472
|
|
},
|
|
{
|
|
"questionId": "q125",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "1811",
|
|
"actual": "1,945",
|
|
"correct": false,
|
|
"inputTokens": 1507,
|
|
"outputTokens": 7,
|
|
"latencyMs": 940
|
|
},
|
|
{
|
|
"questionId": "q125",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "1811",
|
|
"actual": "1030",
|
|
"correct": false,
|
|
"inputTokens": 1439,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1067
|
|
},
|
|
{
|
|
"questionId": "q125",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "1811",
|
|
"actual": "1,945",
|
|
"correct": false,
|
|
"inputTokens": 1443,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1183
|
|
},
|
|
{
|
|
"questionId": "q125",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "1811",
|
|
"actual": "Total conversions: 1030",
|
|
"correct": false,
|
|
"inputTokens": 3827,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1103
|
|
},
|
|
{
|
|
"questionId": "q125",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "1811",
|
|
"actual": "1,454",
|
|
"correct": false,
|
|
"inputTokens": 3413,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1067
|
|
},
|
|
{
|
|
"questionId": "q125",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "1811",
|
|
"actual": "1040",
|
|
"correct": false,
|
|
"inputTokens": 2983,
|
|
"outputTokens": 3,
|
|
"latencyMs": 932
|
|
},
|
|
{
|
|
"questionId": "q125",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "1811",
|
|
"actual": "1,454",
|
|
"correct": false,
|
|
"inputTokens": 3108,
|
|
"outputTokens": 7,
|
|
"latencyMs": 1530
|
|
},
|
|
{
|
|
"questionId": "q126",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "42",
|
|
"actual": "42",
|
|
"correct": true,
|
|
"inputTokens": 3710,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1016
|
|
},
|
|
{
|
|
"questionId": "q126",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "42",
|
|
"actual": "42",
|
|
"correct": true,
|
|
"inputTokens": 4078,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1440
|
|
},
|
|
{
|
|
"questionId": "q126",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "42",
|
|
"actual": "24",
|
|
"correct": false,
|
|
"inputTokens": 1561,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1206
|
|
},
|
|
{
|
|
"questionId": "q126",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "42",
|
|
"actual": "42",
|
|
"correct": true,
|
|
"inputTokens": 1507,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1452
|
|
},
|
|
{
|
|
"questionId": "q126",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "42",
|
|
"actual": "22",
|
|
"correct": false,
|
|
"inputTokens": 1439,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1249
|
|
},
|
|
{
|
|
"questionId": "q126",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "42",
|
|
"actual": "42",
|
|
"correct": true,
|
|
"inputTokens": 1443,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1248
|
|
},
|
|
{
|
|
"questionId": "q126",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "42",
|
|
"actual": "20",
|
|
"correct": false,
|
|
"inputTokens": 3827,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1420
|
|
},
|
|
{
|
|
"questionId": "q126",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "42",
|
|
"actual": "47",
|
|
"correct": false,
|
|
"inputTokens": 3413,
|
|
"outputTokens": 5,
|
|
"latencyMs": 900
|
|
},
|
|
{
|
|
"questionId": "q126",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "42",
|
|
"actual": "42",
|
|
"correct": true,
|
|
"inputTokens": 2983,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1309
|
|
},
|
|
{
|
|
"questionId": "q126",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "42",
|
|
"actual": "47",
|
|
"correct": false,
|
|
"inputTokens": 3108,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1216
|
|
},
|
|
{
|
|
"questionId": "q127",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "28",
|
|
"actual": "38",
|
|
"correct": false,
|
|
"inputTokens": 3710,
|
|
"outputTokens": 2,
|
|
"latencyMs": 3911
|
|
},
|
|
{
|
|
"questionId": "q127",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "28",
|
|
"actual": "24",
|
|
"correct": false,
|
|
"inputTokens": 4078,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1056
|
|
},
|
|
{
|
|
"questionId": "q127",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "28",
|
|
"actual": "20",
|
|
"correct": false,
|
|
"inputTokens": 1561,
|
|
"outputTokens": 2,
|
|
"latencyMs": 839
|
|
},
|
|
{
|
|
"questionId": "q127",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "28",
|
|
"actual": "26",
|
|
"correct": false,
|
|
"inputTokens": 1507,
|
|
"outputTokens": 5,
|
|
"latencyMs": 965
|
|
},
|
|
{
|
|
"questionId": "q127",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "28",
|
|
"actual": "20",
|
|
"correct": false,
|
|
"inputTokens": 1439,
|
|
"outputTokens": 2,
|
|
"latencyMs": 2163
|
|
},
|
|
{
|
|
"questionId": "q127",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "28",
|
|
"actual": "23",
|
|
"correct": false,
|
|
"inputTokens": 1443,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1006
|
|
},
|
|
{
|
|
"questionId": "q127",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "28",
|
|
"actual": "18",
|
|
"correct": false,
|
|
"inputTokens": 3827,
|
|
"outputTokens": 2,
|
|
"latencyMs": 2619
|
|
},
|
|
{
|
|
"questionId": "q127",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "28",
|
|
"actual": "24",
|
|
"correct": false,
|
|
"inputTokens": 3413,
|
|
"outputTokens": 5,
|
|
"latencyMs": 989
|
|
},
|
|
{
|
|
"questionId": "q127",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "28",
|
|
"actual": "22",
|
|
"correct": false,
|
|
"inputTokens": 2983,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1830
|
|
},
|
|
{
|
|
"questionId": "q127",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "28",
|
|
"actual": "23",
|
|
"correct": false,
|
|
"inputTokens": 3108,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1001
|
|
},
|
|
{
|
|
"questionId": "q128",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "11",
|
|
"actual": "15",
|
|
"correct": false,
|
|
"inputTokens": 3710,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1217
|
|
},
|
|
{
|
|
"questionId": "q128",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "11",
|
|
"actual": "11",
|
|
"correct": true,
|
|
"inputTokens": 4078,
|
|
"outputTokens": 5,
|
|
"latencyMs": 3180
|
|
},
|
|
{
|
|
"questionId": "q128",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "11",
|
|
"actual": "15",
|
|
"correct": false,
|
|
"inputTokens": 1561,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1076
|
|
},
|
|
{
|
|
"questionId": "q128",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "11",
|
|
"actual": "12",
|
|
"correct": false,
|
|
"inputTokens": 1507,
|
|
"outputTokens": 5,
|
|
"latencyMs": 912
|
|
},
|
|
{
|
|
"questionId": "q128",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "11",
|
|
"actual": "15",
|
|
"correct": false,
|
|
"inputTokens": 1439,
|
|
"outputTokens": 2,
|
|
"latencyMs": 2900
|
|
},
|
|
{
|
|
"questionId": "q128",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "11",
|
|
"actual": "11",
|
|
"correct": true,
|
|
"inputTokens": 1443,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1389
|
|
},
|
|
{
|
|
"questionId": "q128",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "11",
|
|
"actual": "12",
|
|
"correct": false,
|
|
"inputTokens": 3827,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1107
|
|
},
|
|
{
|
|
"questionId": "q128",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "11",
|
|
"actual": "11",
|
|
"correct": true,
|
|
"inputTokens": 3413,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1150
|
|
},
|
|
{
|
|
"questionId": "q128",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "11",
|
|
"actual": "18",
|
|
"correct": false,
|
|
"inputTokens": 2983,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1047
|
|
},
|
|
{
|
|
"questionId": "q128",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "11",
|
|
"actual": "11",
|
|
"correct": true,
|
|
"inputTokens": 3108,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1169
|
|
},
|
|
{
|
|
"questionId": "q129",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "58",
|
|
"actual": "36",
|
|
"correct": false,
|
|
"inputTokens": 3709,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1007
|
|
},
|
|
{
|
|
"questionId": "q129",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "58",
|
|
"actual": "50",
|
|
"correct": false,
|
|
"inputTokens": 4078,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1342
|
|
},
|
|
{
|
|
"questionId": "q129",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "58",
|
|
"actual": "24",
|
|
"correct": false,
|
|
"inputTokens": 1560,
|
|
"outputTokens": 2,
|
|
"latencyMs": 828
|
|
},
|
|
{
|
|
"questionId": "q129",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "58",
|
|
"actual": "47",
|
|
"correct": false,
|
|
"inputTokens": 1507,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1305
|
|
},
|
|
{
|
|
"questionId": "q129",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "58",
|
|
"actual": "15",
|
|
"correct": false,
|
|
"inputTokens": 1438,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1305
|
|
},
|
|
{
|
|
"questionId": "q129",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "58",
|
|
"actual": "54",
|
|
"correct": false,
|
|
"inputTokens": 1443,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1406
|
|
},
|
|
{
|
|
"questionId": "q129",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "58",
|
|
"actual": "18",
|
|
"correct": false,
|
|
"inputTokens": 3826,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1513
|
|
},
|
|
{
|
|
"questionId": "q129",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "58",
|
|
"actual": "47",
|
|
"correct": false,
|
|
"inputTokens": 3413,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1026
|
|
},
|
|
{
|
|
"questionId": "q129",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "58",
|
|
"actual": "42",
|
|
"correct": false,
|
|
"inputTokens": 2982,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1373
|
|
},
|
|
{
|
|
"questionId": "q129",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "58",
|
|
"actual": "54",
|
|
"correct": false,
|
|
"inputTokens": 3108,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1112
|
|
},
|
|
{
|
|
"questionId": "q130",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "41",
|
|
"actual": "34",
|
|
"correct": false,
|
|
"inputTokens": 3709,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1248
|
|
},
|
|
{
|
|
"questionId": "q130",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "41",
|
|
"actual": "31",
|
|
"correct": false,
|
|
"inputTokens": 4078,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1083
|
|
},
|
|
{
|
|
"questionId": "q130",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "41",
|
|
"actual": "24",
|
|
"correct": false,
|
|
"inputTokens": 1560,
|
|
"outputTokens": 2,
|
|
"latencyMs": 895
|
|
},
|
|
{
|
|
"questionId": "q130",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "41",
|
|
"actual": "38",
|
|
"correct": false,
|
|
"inputTokens": 1507,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1087
|
|
},
|
|
{
|
|
"questionId": "q130",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "41",
|
|
"actual": "18",
|
|
"correct": false,
|
|
"inputTokens": 1438,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1157
|
|
},
|
|
{
|
|
"questionId": "q130",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "41",
|
|
"actual": "38",
|
|
"correct": false,
|
|
"inputTokens": 1443,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1155
|
|
},
|
|
{
|
|
"questionId": "q130",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "41",
|
|
"actual": "18",
|
|
"correct": false,
|
|
"inputTokens": 3826,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1959
|
|
},
|
|
{
|
|
"questionId": "q130",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "41",
|
|
"actual": "31",
|
|
"correct": false,
|
|
"inputTokens": 3413,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1110
|
|
},
|
|
{
|
|
"questionId": "q130",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "41",
|
|
"actual": "34",
|
|
"correct": false,
|
|
"inputTokens": 2982,
|
|
"outputTokens": 2,
|
|
"latencyMs": 4540
|
|
},
|
|
{
|
|
"questionId": "q130",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "41",
|
|
"actual": "31",
|
|
"correct": false,
|
|
"inputTokens": 3108,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1286
|
|
},
|
|
{
|
|
"questionId": "q131",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "23",
|
|
"actual": "18",
|
|
"correct": false,
|
|
"inputTokens": 3709,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1059
|
|
},
|
|
{
|
|
"questionId": "q131",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "23",
|
|
"actual": "20",
|
|
"correct": false,
|
|
"inputTokens": 4078,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1302
|
|
},
|
|
{
|
|
"questionId": "q131",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "23",
|
|
"actual": "18",
|
|
"correct": false,
|
|
"inputTokens": 1560,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1019
|
|
},
|
|
{
|
|
"questionId": "q131",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "23",
|
|
"actual": "20",
|
|
"correct": false,
|
|
"inputTokens": 1507,
|
|
"outputTokens": 5,
|
|
"latencyMs": 975
|
|
},
|
|
{
|
|
"questionId": "q131",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "23",
|
|
"actual": "18",
|
|
"correct": false,
|
|
"inputTokens": 1438,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1056
|
|
},
|
|
{
|
|
"questionId": "q131",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "23",
|
|
"actual": "20",
|
|
"correct": false,
|
|
"inputTokens": 1443,
|
|
"outputTokens": 5,
|
|
"latencyMs": 984
|
|
},
|
|
{
|
|
"questionId": "q131",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "23",
|
|
"actual": "15",
|
|
"correct": false,
|
|
"inputTokens": 3826,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1420
|
|
},
|
|
{
|
|
"questionId": "q131",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "23",
|
|
"actual": "21",
|
|
"correct": false,
|
|
"inputTokens": 3413,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1139
|
|
},
|
|
{
|
|
"questionId": "q131",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "23",
|
|
"actual": "18",
|
|
"correct": false,
|
|
"inputTokens": 2982,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1097
|
|
},
|
|
{
|
|
"questionId": "q131",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "23",
|
|
"actual": "21",
|
|
"correct": false,
|
|
"inputTokens": 3108,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1203
|
|
},
|
|
{
|
|
"questionId": "q132",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "430828",
|
|
"actual": "430828",
|
|
"correct": true,
|
|
"inputTokens": 15188,
|
|
"outputTokens": 3,
|
|
"latencyMs": 2257
|
|
},
|
|
{
|
|
"questionId": "q132",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "430828",
|
|
"actual": "430828",
|
|
"correct": true,
|
|
"inputTokens": 17409,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1292
|
|
},
|
|
{
|
|
"questionId": "q132",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "430828",
|
|
"actual": "430828",
|
|
"correct": true,
|
|
"inputTokens": 8789,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1877
|
|
},
|
|
{
|
|
"questionId": "q132",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "430828",
|
|
"actual": "430828",
|
|
"correct": true,
|
|
"inputTokens": 9279,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1118
|
|
},
|
|
{
|
|
"questionId": "q132",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "430828",
|
|
"actual": "430828",
|
|
"correct": true,
|
|
"inputTokens": 8557,
|
|
"outputTokens": 3,
|
|
"latencyMs": 4023
|
|
},
|
|
{
|
|
"questionId": "q132",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "430828",
|
|
"actual": "430828",
|
|
"correct": true,
|
|
"inputTokens": 9125,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1134
|
|
},
|
|
{
|
|
"questionId": "q132",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "430828",
|
|
"actual": "430828",
|
|
"correct": true,
|
|
"inputTokens": 15482,
|
|
"outputTokens": 3,
|
|
"latencyMs": 5304
|
|
},
|
|
{
|
|
"questionId": "q132",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "430828",
|
|
"actual": "430828",
|
|
"correct": true,
|
|
"inputTokens": 15367,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1442
|
|
},
|
|
{
|
|
"questionId": "q132",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "430828",
|
|
"actual": "430828",
|
|
"correct": true,
|
|
"inputTokens": 13172,
|
|
"outputTokens": 3,
|
|
"latencyMs": 2157
|
|
},
|
|
{
|
|
"questionId": "q132",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "430828",
|
|
"actual": "430828",
|
|
"correct": true,
|
|
"inputTokens": 14483,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1483
|
|
},
|
|
{
|
|
"questionId": "q133",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "11798",
|
|
"actual": "11798",
|
|
"correct": true,
|
|
"inputTokens": 15190,
|
|
"outputTokens": 3,
|
|
"latencyMs": 2084
|
|
},
|
|
{
|
|
"questionId": "q133",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "11798",
|
|
"actual": "11798",
|
|
"correct": true,
|
|
"inputTokens": 17410,
|
|
"outputTokens": 6,
|
|
"latencyMs": 2592
|
|
},
|
|
{
|
|
"questionId": "q133",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "11798",
|
|
"actual": "11798",
|
|
"correct": true,
|
|
"inputTokens": 8791,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1208
|
|
},
|
|
{
|
|
"questionId": "q133",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "11798",
|
|
"actual": "11798",
|
|
"correct": true,
|
|
"inputTokens": 9280,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1261
|
|
},
|
|
{
|
|
"questionId": "q133",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "11798",
|
|
"actual": "11798",
|
|
"correct": true,
|
|
"inputTokens": 8559,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1697
|
|
},
|
|
{
|
|
"questionId": "q133",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "11798",
|
|
"actual": "11798",
|
|
"correct": true,
|
|
"inputTokens": 9126,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1171
|
|
},
|
|
{
|
|
"questionId": "q133",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "11798",
|
|
"actual": "11798",
|
|
"correct": true,
|
|
"inputTokens": 15484,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1704
|
|
},
|
|
{
|
|
"questionId": "q133",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "11798",
|
|
"actual": "11798",
|
|
"correct": true,
|
|
"inputTokens": 15368,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1637
|
|
},
|
|
{
|
|
"questionId": "q133",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "11798",
|
|
"actual": "11798",
|
|
"correct": true,
|
|
"inputTokens": 13174,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1599
|
|
},
|
|
{
|
|
"questionId": "q133",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "11798",
|
|
"actual": "11798",
|
|
"correct": true,
|
|
"inputTokens": 14484,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1505
|
|
},
|
|
{
|
|
"questionId": "q134",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "183631",
|
|
"actual": "183631",
|
|
"correct": true,
|
|
"inputTokens": 15193,
|
|
"outputTokens": 3,
|
|
"latencyMs": 2340
|
|
},
|
|
{
|
|
"questionId": "q134",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "183631",
|
|
"actual": "183631",
|
|
"correct": true,
|
|
"inputTokens": 17412,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1380
|
|
},
|
|
{
|
|
"questionId": "q134",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "183631",
|
|
"actual": "183631",
|
|
"correct": true,
|
|
"inputTokens": 8794,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1631
|
|
},
|
|
{
|
|
"questionId": "q134",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "183631",
|
|
"actual": "183631",
|
|
"correct": true,
|
|
"inputTokens": 9282,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1271
|
|
},
|
|
{
|
|
"questionId": "q134",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "183631",
|
|
"actual": "183631",
|
|
"correct": true,
|
|
"inputTokens": 8562,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1620
|
|
},
|
|
{
|
|
"questionId": "q134",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "183631",
|
|
"actual": "183631",
|
|
"correct": true,
|
|
"inputTokens": 9128,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1279
|
|
},
|
|
{
|
|
"questionId": "q134",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "183631",
|
|
"actual": "183631",
|
|
"correct": true,
|
|
"inputTokens": 15487,
|
|
"outputTokens": 3,
|
|
"latencyMs": 14565
|
|
},
|
|
{
|
|
"questionId": "q134",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "183631",
|
|
"actual": "183631",
|
|
"correct": true,
|
|
"inputTokens": 15370,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1559
|
|
},
|
|
{
|
|
"questionId": "q134",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "183631",
|
|
"actual": "183631",
|
|
"correct": true,
|
|
"inputTokens": 13177,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1600
|
|
},
|
|
{
|
|
"questionId": "q134",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "183631",
|
|
"actual": "183631",
|
|
"correct": true,
|
|
"inputTokens": 14486,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1179
|
|
},
|
|
{
|
|
"questionId": "q135",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "29246",
|
|
"actual": "29246",
|
|
"correct": true,
|
|
"inputTokens": 15192,
|
|
"outputTokens": 3,
|
|
"latencyMs": 2508
|
|
},
|
|
{
|
|
"questionId": "q135",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "29246",
|
|
"actual": "29246",
|
|
"correct": true,
|
|
"inputTokens": 17412,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1359
|
|
},
|
|
{
|
|
"questionId": "q135",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "29246",
|
|
"actual": "29246",
|
|
"correct": true,
|
|
"inputTokens": 8793,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1188
|
|
},
|
|
{
|
|
"questionId": "q135",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "29246",
|
|
"actual": "29246",
|
|
"correct": true,
|
|
"inputTokens": 9282,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1204
|
|
},
|
|
{
|
|
"questionId": "q135",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "29246",
|
|
"actual": "29246",
|
|
"correct": true,
|
|
"inputTokens": 8561,
|
|
"outputTokens": 3,
|
|
"latencyMs": 2448
|
|
},
|
|
{
|
|
"questionId": "q135",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "29246",
|
|
"actual": "29246",
|
|
"correct": true,
|
|
"inputTokens": 9128,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1311
|
|
},
|
|
{
|
|
"questionId": "q135",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "29246",
|
|
"actual": "29246",
|
|
"correct": true,
|
|
"inputTokens": 15486,
|
|
"outputTokens": 3,
|
|
"latencyMs": 2442
|
|
},
|
|
{
|
|
"questionId": "q135",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "29246",
|
|
"actual": "29246",
|
|
"correct": true,
|
|
"inputTokens": 15370,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1414
|
|
},
|
|
{
|
|
"questionId": "q135",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "29246",
|
|
"actual": "29246",
|
|
"correct": true,
|
|
"inputTokens": 13176,
|
|
"outputTokens": 3,
|
|
"latencyMs": 2254
|
|
},
|
|
{
|
|
"questionId": "q135",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "29246",
|
|
"actual": "29246",
|
|
"correct": true,
|
|
"inputTokens": 14486,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1512
|
|
},
|
|
{
|
|
"questionId": "q136",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "135306",
|
|
"actual": "135306",
|
|
"correct": true,
|
|
"inputTokens": 15188,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1565
|
|
},
|
|
{
|
|
"questionId": "q136",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "135306",
|
|
"actual": "135306",
|
|
"correct": true,
|
|
"inputTokens": 17407,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1871
|
|
},
|
|
{
|
|
"questionId": "q136",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "135306",
|
|
"actual": "135306",
|
|
"correct": true,
|
|
"inputTokens": 8789,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1963
|
|
},
|
|
{
|
|
"questionId": "q136",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "135306",
|
|
"actual": "135306",
|
|
"correct": true,
|
|
"inputTokens": 9277,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1533
|
|
},
|
|
{
|
|
"questionId": "q136",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "135306",
|
|
"actual": "135306",
|
|
"correct": true,
|
|
"inputTokens": 8557,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1561
|
|
},
|
|
{
|
|
"questionId": "q136",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "135306",
|
|
"actual": "135306",
|
|
"correct": true,
|
|
"inputTokens": 9123,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1200
|
|
},
|
|
{
|
|
"questionId": "q136",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "135306",
|
|
"actual": "135306",
|
|
"correct": true,
|
|
"inputTokens": 15482,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1657
|
|
},
|
|
{
|
|
"questionId": "q136",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "135306",
|
|
"actual": "135306",
|
|
"correct": true,
|
|
"inputTokens": 15365,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1582
|
|
},
|
|
{
|
|
"questionId": "q136",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "135306",
|
|
"actual": "135306",
|
|
"correct": true,
|
|
"inputTokens": 13172,
|
|
"outputTokens": 3,
|
|
"latencyMs": 3402
|
|
},
|
|
{
|
|
"questionId": "q136",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "135306",
|
|
"actual": "135306",
|
|
"correct": true,
|
|
"inputTokens": 14481,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1251
|
|
},
|
|
{
|
|
"questionId": "q137",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "24914",
|
|
"actual": "24914",
|
|
"correct": true,
|
|
"inputTokens": 15187,
|
|
"outputTokens": 3,
|
|
"latencyMs": 2019
|
|
},
|
|
{
|
|
"questionId": "q137",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "24914",
|
|
"actual": "24914",
|
|
"correct": true,
|
|
"inputTokens": 17408,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1517
|
|
},
|
|
{
|
|
"questionId": "q137",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "24914",
|
|
"actual": "The repository undefined/react-native does not exist in the provided data.",
|
|
"correct": false,
|
|
"inputTokens": 8788,
|
|
"outputTokens": 14,
|
|
"latencyMs": 1737
|
|
},
|
|
{
|
|
"questionId": "q137",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "24914",
|
|
"actual": "24914",
|
|
"correct": true,
|
|
"inputTokens": 9278,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1467
|
|
},
|
|
{
|
|
"questionId": "q137",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "24914",
|
|
"actual": "24914",
|
|
"correct": true,
|
|
"inputTokens": 8556,
|
|
"outputTokens": 3,
|
|
"latencyMs": 3442
|
|
},
|
|
{
|
|
"questionId": "q137",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "24914",
|
|
"actual": "24914",
|
|
"correct": true,
|
|
"inputTokens": 9124,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1300
|
|
},
|
|
{
|
|
"questionId": "q137",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "24914",
|
|
"actual": "24914",
|
|
"correct": true,
|
|
"inputTokens": 15481,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1825
|
|
},
|
|
{
|
|
"questionId": "q137",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "24914",
|
|
"actual": "24914",
|
|
"correct": true,
|
|
"inputTokens": 15366,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1443
|
|
},
|
|
{
|
|
"questionId": "q137",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "24914",
|
|
"actual": "124320",
|
|
"correct": false,
|
|
"inputTokens": 13171,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1783
|
|
},
|
|
{
|
|
"questionId": "q137",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "24914",
|
|
"actual": "24914",
|
|
"correct": true,
|
|
"inputTokens": 14482,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1362
|
|
},
|
|
{
|
|
"questionId": "q138",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "111683",
|
|
"actual": "111683",
|
|
"correct": true,
|
|
"inputTokens": 15187,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1824
|
|
},
|
|
{
|
|
"questionId": "q138",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "111683",
|
|
"actual": "111683",
|
|
"correct": true,
|
|
"inputTokens": 17407,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1479
|
|
},
|
|
{
|
|
"questionId": "q138",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "111683",
|
|
"actual": "108017",
|
|
"correct": false,
|
|
"inputTokens": 8788,
|
|
"outputTokens": 3,
|
|
"latencyMs": 3315
|
|
},
|
|
{
|
|
"questionId": "q138",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "111683",
|
|
"actual": "111683",
|
|
"correct": true,
|
|
"inputTokens": 9277,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1270
|
|
},
|
|
{
|
|
"questionId": "q138",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "111683",
|
|
"actual": "111683",
|
|
"correct": true,
|
|
"inputTokens": 8556,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1384
|
|
},
|
|
{
|
|
"questionId": "q138",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "111683",
|
|
"actual": "111683",
|
|
"correct": true,
|
|
"inputTokens": 9123,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1252
|
|
},
|
|
{
|
|
"questionId": "q138",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "111683",
|
|
"actual": "111683",
|
|
"correct": true,
|
|
"inputTokens": 15481,
|
|
"outputTokens": 3,
|
|
"latencyMs": 3048
|
|
},
|
|
{
|
|
"questionId": "q138",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "111683",
|
|
"actual": "111683",
|
|
"correct": true,
|
|
"inputTokens": 15365,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1381
|
|
},
|
|
{
|
|
"questionId": "q138",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "111683",
|
|
"actual": "111683",
|
|
"correct": true,
|
|
"inputTokens": 13171,
|
|
"outputTokens": 3,
|
|
"latencyMs": 3804
|
|
},
|
|
{
|
|
"questionId": "q138",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "111683",
|
|
"actual": "111683",
|
|
"correct": true,
|
|
"inputTokens": 14481,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1498
|
|
},
|
|
{
|
|
"questionId": "q139",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "13364",
|
|
"actual": "13364",
|
|
"correct": true,
|
|
"inputTokens": 15194,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1726
|
|
},
|
|
{
|
|
"questionId": "q139",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "13364",
|
|
"actual": "13364",
|
|
"correct": true,
|
|
"inputTokens": 17412,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1526
|
|
},
|
|
{
|
|
"questionId": "q139",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "13364",
|
|
"actual": "13364",
|
|
"correct": true,
|
|
"inputTokens": 8795,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1685
|
|
},
|
|
{
|
|
"questionId": "q139",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "13364",
|
|
"actual": "13364",
|
|
"correct": true,
|
|
"inputTokens": 9282,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1140
|
|
},
|
|
{
|
|
"questionId": "q139",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "13364",
|
|
"actual": "0",
|
|
"correct": false,
|
|
"inputTokens": 8563,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1933
|
|
},
|
|
{
|
|
"questionId": "q139",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "13364",
|
|
"actual": "13364",
|
|
"correct": true,
|
|
"inputTokens": 9128,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1157
|
|
},
|
|
{
|
|
"questionId": "q139",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "13364",
|
|
"actual": "13364",
|
|
"correct": true,
|
|
"inputTokens": 15488,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1249
|
|
},
|
|
{
|
|
"questionId": "q139",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "13364",
|
|
"actual": "13364",
|
|
"correct": true,
|
|
"inputTokens": 15370,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1347
|
|
},
|
|
{
|
|
"questionId": "q139",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "13364",
|
|
"actual": "13364",
|
|
"correct": true,
|
|
"inputTokens": 13178,
|
|
"outputTokens": 3,
|
|
"latencyMs": 2174
|
|
},
|
|
{
|
|
"questionId": "q139",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "13364",
|
|
"actual": "13364",
|
|
"correct": true,
|
|
"inputTokens": 14486,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1197
|
|
},
|
|
{
|
|
"questionId": "q140",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "98464",
|
|
"actual": "0",
|
|
"correct": false,
|
|
"inputTokens": 15186,
|
|
"outputTokens": 2,
|
|
"latencyMs": 3252
|
|
},
|
|
{
|
|
"questionId": "q140",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "98464",
|
|
"actual": "98464",
|
|
"correct": true,
|
|
"inputTokens": 17405,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1667
|
|
},
|
|
{
|
|
"questionId": "q140",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "98464",
|
|
"actual": "0",
|
|
"correct": false,
|
|
"inputTokens": 8787,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1192
|
|
},
|
|
{
|
|
"questionId": "q140",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "98464",
|
|
"actual": "98464",
|
|
"correct": true,
|
|
"inputTokens": 9275,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1113
|
|
},
|
|
{
|
|
"questionId": "q140",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "98464",
|
|
"actual": "0",
|
|
"correct": false,
|
|
"inputTokens": 8555,
|
|
"outputTokens": 2,
|
|
"latencyMs": 2198
|
|
},
|
|
{
|
|
"questionId": "q140",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "98464",
|
|
"actual": "98464",
|
|
"correct": true,
|
|
"inputTokens": 9121,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1187
|
|
},
|
|
{
|
|
"questionId": "q140",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "98464",
|
|
"actual": "0",
|
|
"correct": false,
|
|
"inputTokens": 15480,
|
|
"outputTokens": 2,
|
|
"latencyMs": 8573
|
|
},
|
|
{
|
|
"questionId": "q140",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "98464",
|
|
"actual": "98464",
|
|
"correct": true,
|
|
"inputTokens": 15363,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1311
|
|
},
|
|
{
|
|
"questionId": "q140",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "98464",
|
|
"actual": "0",
|
|
"correct": false,
|
|
"inputTokens": 13170,
|
|
"outputTokens": 2,
|
|
"latencyMs": 3471
|
|
},
|
|
{
|
|
"questionId": "q140",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "98464",
|
|
"actual": "98464",
|
|
"correct": true,
|
|
"inputTokens": 14479,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1457
|
|
},
|
|
{
|
|
"questionId": "q141",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "6378",
|
|
"actual": "6378",
|
|
"correct": true,
|
|
"inputTokens": 15188,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1363
|
|
},
|
|
{
|
|
"questionId": "q141",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "6378",
|
|
"actual": "6378",
|
|
"correct": true,
|
|
"inputTokens": 17408,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1803
|
|
},
|
|
{
|
|
"questionId": "q141",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "6378",
|
|
"actual": "6378",
|
|
"correct": true,
|
|
"inputTokens": 8789,
|
|
"outputTokens": 3,
|
|
"latencyMs": 3696
|
|
},
|
|
{
|
|
"questionId": "q141",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "6378",
|
|
"actual": "6378",
|
|
"correct": true,
|
|
"inputTokens": 9278,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1391
|
|
},
|
|
{
|
|
"questionId": "q141",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "6378",
|
|
"actual": "93731",
|
|
"correct": false,
|
|
"inputTokens": 8557,
|
|
"outputTokens": 3,
|
|
"latencyMs": 7861
|
|
},
|
|
{
|
|
"questionId": "q141",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "6378",
|
|
"actual": "6378",
|
|
"correct": true,
|
|
"inputTokens": 9124,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1420
|
|
},
|
|
{
|
|
"questionId": "q141",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "6378",
|
|
"actual": "6378",
|
|
"correct": true,
|
|
"inputTokens": 15482,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1769
|
|
},
|
|
{
|
|
"questionId": "q141",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "6378",
|
|
"actual": "6378",
|
|
"correct": true,
|
|
"inputTokens": 15366,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1233
|
|
},
|
|
{
|
|
"questionId": "q141",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "6378",
|
|
"actual": "93731",
|
|
"correct": false,
|
|
"inputTokens": 13172,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1831
|
|
},
|
|
{
|
|
"questionId": "q141",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "6378",
|
|
"actual": "6378",
|
|
"correct": true,
|
|
"inputTokens": 14482,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1507
|
|
},
|
|
{
|
|
"questionId": "q142",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "254916",
|
|
"actual": "254916",
|
|
"correct": true,
|
|
"inputTokens": 15190,
|
|
"outputTokens": 3,
|
|
"latencyMs": 10752
|
|
},
|
|
{
|
|
"questionId": "q142",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "254916",
|
|
"actual": "254916",
|
|
"correct": true,
|
|
"inputTokens": 17409,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1672
|
|
},
|
|
{
|
|
"questionId": "q142",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "254916",
|
|
"actual": "254916",
|
|
"correct": true,
|
|
"inputTokens": 8791,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1788
|
|
},
|
|
{
|
|
"questionId": "q142",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "254916",
|
|
"actual": "254916",
|
|
"correct": true,
|
|
"inputTokens": 9279,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1633
|
|
},
|
|
{
|
|
"questionId": "q142",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "254916",
|
|
"actual": "254916",
|
|
"correct": true,
|
|
"inputTokens": 8559,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1365
|
|
},
|
|
{
|
|
"questionId": "q142",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "254916",
|
|
"actual": "254916",
|
|
"correct": true,
|
|
"inputTokens": 9125,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1242
|
|
},
|
|
{
|
|
"questionId": "q142",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "254916",
|
|
"actual": "254916",
|
|
"correct": true,
|
|
"inputTokens": 15484,
|
|
"outputTokens": 3,
|
|
"latencyMs": 2237
|
|
},
|
|
{
|
|
"questionId": "q142",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "254916",
|
|
"actual": "254916",
|
|
"correct": true,
|
|
"inputTokens": 15367,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1275
|
|
},
|
|
{
|
|
"questionId": "q142",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "254916",
|
|
"actual": "254916",
|
|
"correct": true,
|
|
"inputTokens": 13174,
|
|
"outputTokens": 3,
|
|
"latencyMs": 3028
|
|
},
|
|
{
|
|
"questionId": "q142",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "254916",
|
|
"actual": "254916",
|
|
"correct": true,
|
|
"inputTokens": 14483,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1615
|
|
},
|
|
{
|
|
"questionId": "q143",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "32413",
|
|
"actual": "32413",
|
|
"correct": true,
|
|
"inputTokens": 15188,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1972
|
|
},
|
|
{
|
|
"questionId": "q143",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "32413",
|
|
"actual": "32413",
|
|
"correct": true,
|
|
"inputTokens": 17410,
|
|
"outputTokens": 6,
|
|
"latencyMs": 2308
|
|
},
|
|
{
|
|
"questionId": "q143",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "32413",
|
|
"actual": "32413",
|
|
"correct": true,
|
|
"inputTokens": 8789,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1361
|
|
},
|
|
{
|
|
"questionId": "q143",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "32413",
|
|
"actual": "32413",
|
|
"correct": true,
|
|
"inputTokens": 9280,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1162
|
|
},
|
|
{
|
|
"questionId": "q143",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "32413",
|
|
"actual": "32413",
|
|
"correct": true,
|
|
"inputTokens": 8557,
|
|
"outputTokens": 3,
|
|
"latencyMs": 2196
|
|
},
|
|
{
|
|
"questionId": "q143",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "32413",
|
|
"actual": "32413",
|
|
"correct": true,
|
|
"inputTokens": 9126,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1199
|
|
},
|
|
{
|
|
"questionId": "q143",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "32413",
|
|
"actual": "32413",
|
|
"correct": true,
|
|
"inputTokens": 15482,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1758
|
|
},
|
|
{
|
|
"questionId": "q143",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "32413",
|
|
"actual": "32413",
|
|
"correct": true,
|
|
"inputTokens": 15368,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1340
|
|
},
|
|
{
|
|
"questionId": "q143",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "32413",
|
|
"actual": "32413",
|
|
"correct": true,
|
|
"inputTokens": 13172,
|
|
"outputTokens": 3,
|
|
"latencyMs": 2122
|
|
},
|
|
{
|
|
"questionId": "q143",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "32413",
|
|
"actual": "32413",
|
|
"correct": true,
|
|
"inputTokens": 14484,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1156
|
|
},
|
|
{
|
|
"questionId": "q144",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "240059",
|
|
"actual": "0",
|
|
"correct": false,
|
|
"inputTokens": 15186,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1208
|
|
},
|
|
{
|
|
"questionId": "q144",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "240059",
|
|
"actual": "240059",
|
|
"correct": true,
|
|
"inputTokens": 17405,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1826
|
|
},
|
|
{
|
|
"questionId": "q144",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "240059",
|
|
"actual": "undefined",
|
|
"correct": false,
|
|
"inputTokens": 8787,
|
|
"outputTokens": 2,
|
|
"latencyMs": 2224
|
|
},
|
|
{
|
|
"questionId": "q144",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "240059",
|
|
"actual": "240059",
|
|
"correct": true,
|
|
"inputTokens": 9275,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1220
|
|
},
|
|
{
|
|
"questionId": "q144",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "240059",
|
|
"actual": "undefined",
|
|
"correct": false,
|
|
"inputTokens": 8555,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1199
|
|
},
|
|
{
|
|
"questionId": "q144",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "240059",
|
|
"actual": "240059",
|
|
"correct": true,
|
|
"inputTokens": 9121,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1264
|
|
},
|
|
{
|
|
"questionId": "q144",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "240059",
|
|
"actual": "undefined/react does not exist in the provided data.",
|
|
"correct": false,
|
|
"inputTokens": 15480,
|
|
"outputTokens": 11,
|
|
"latencyMs": 3072
|
|
},
|
|
{
|
|
"questionId": "q144",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "240059",
|
|
"actual": "240059",
|
|
"correct": true,
|
|
"inputTokens": 15363,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1609
|
|
},
|
|
{
|
|
"questionId": "q144",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "240059",
|
|
"actual": "undefined/react does not exist in the provided data.",
|
|
"correct": false,
|
|
"inputTokens": 13170,
|
|
"outputTokens": 11,
|
|
"latencyMs": 2608
|
|
},
|
|
{
|
|
"questionId": "q144",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "240059",
|
|
"actual": "240059",
|
|
"correct": true,
|
|
"inputTokens": 14479,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1237
|
|
},
|
|
{
|
|
"questionId": "q145",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "48986",
|
|
"actual": "0",
|
|
"correct": false,
|
|
"inputTokens": 15187,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1906
|
|
},
|
|
{
|
|
"questionId": "q145",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "48986",
|
|
"actual": "48986",
|
|
"correct": true,
|
|
"inputTokens": 17406,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1399
|
|
},
|
|
{
|
|
"questionId": "q145",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "48986",
|
|
"actual": "0",
|
|
"correct": false,
|
|
"inputTokens": 8788,
|
|
"outputTokens": 2,
|
|
"latencyMs": 2026
|
|
},
|
|
{
|
|
"questionId": "q145",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "48986",
|
|
"actual": "48986",
|
|
"correct": true,
|
|
"inputTokens": 9276,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1318
|
|
},
|
|
{
|
|
"questionId": "q145",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "48986",
|
|
"actual": "0",
|
|
"correct": false,
|
|
"inputTokens": 8556,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1605
|
|
},
|
|
{
|
|
"questionId": "q145",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "48986",
|
|
"actual": "48986",
|
|
"correct": true,
|
|
"inputTokens": 9122,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1270
|
|
},
|
|
{
|
|
"questionId": "q145",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "48986",
|
|
"actual": "0",
|
|
"correct": false,
|
|
"inputTokens": 15481,
|
|
"outputTokens": 2,
|
|
"latencyMs": 5367
|
|
},
|
|
{
|
|
"questionId": "q145",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "48986",
|
|
"actual": "48986",
|
|
"correct": true,
|
|
"inputTokens": 15364,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1204
|
|
},
|
|
{
|
|
"questionId": "q145",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "48986",
|
|
"actual": "The repository \"undefined/Python\" does not exist in the provided data.",
|
|
"correct": false,
|
|
"inputTokens": 13171,
|
|
"outputTokens": 16,
|
|
"latencyMs": 6329
|
|
},
|
|
{
|
|
"questionId": "q145",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "48986",
|
|
"actual": "48986",
|
|
"correct": true,
|
|
"inputTokens": 14480,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1369
|
|
},
|
|
{
|
|
"questionId": "q146",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "209624",
|
|
"actual": "209624",
|
|
"correct": true,
|
|
"inputTokens": 15186,
|
|
"outputTokens": 3,
|
|
"latencyMs": 2063
|
|
},
|
|
{
|
|
"questionId": "q146",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "209624",
|
|
"actual": "209624",
|
|
"correct": true,
|
|
"inputTokens": 17405,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1470
|
|
},
|
|
{
|
|
"questionId": "q146",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "209624",
|
|
"actual": "209624",
|
|
"correct": true,
|
|
"inputTokens": 8787,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1386
|
|
},
|
|
{
|
|
"questionId": "q146",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "209624",
|
|
"actual": "209624",
|
|
"correct": true,
|
|
"inputTokens": 9275,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1104
|
|
},
|
|
{
|
|
"questionId": "q146",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "209624",
|
|
"actual": "209624",
|
|
"correct": true,
|
|
"inputTokens": 8555,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1747
|
|
},
|
|
{
|
|
"questionId": "q146",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "209624",
|
|
"actual": "209624",
|
|
"correct": true,
|
|
"inputTokens": 9121,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1300
|
|
},
|
|
{
|
|
"questionId": "q146",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "209624",
|
|
"actual": "209624",
|
|
"correct": true,
|
|
"inputTokens": 15480,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1443
|
|
},
|
|
{
|
|
"questionId": "q146",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "209624",
|
|
"actual": "209624",
|
|
"correct": true,
|
|
"inputTokens": 15363,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1282
|
|
},
|
|
{
|
|
"questionId": "q146",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "209624",
|
|
"actual": "209624",
|
|
"correct": true,
|
|
"inputTokens": 13170,
|
|
"outputTokens": 3,
|
|
"latencyMs": 2185
|
|
},
|
|
{
|
|
"questionId": "q146",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "209624",
|
|
"actual": "209624",
|
|
"correct": true,
|
|
"inputTokens": 14479,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1407
|
|
},
|
|
{
|
|
"questionId": "q147",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "58023",
|
|
"actual": "58023",
|
|
"correct": true,
|
|
"inputTokens": 15186,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1743
|
|
},
|
|
{
|
|
"questionId": "q147",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "58023",
|
|
"actual": "58023",
|
|
"correct": true,
|
|
"inputTokens": 17406,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1564
|
|
},
|
|
{
|
|
"questionId": "q147",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "58023",
|
|
"actual": "58023",
|
|
"correct": true,
|
|
"inputTokens": 8787,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1317
|
|
},
|
|
{
|
|
"questionId": "q147",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "58023",
|
|
"actual": "58023",
|
|
"correct": true,
|
|
"inputTokens": 9276,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1258
|
|
},
|
|
{
|
|
"questionId": "q147",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "58023",
|
|
"actual": "58023",
|
|
"correct": true,
|
|
"inputTokens": 8555,
|
|
"outputTokens": 3,
|
|
"latencyMs": 2419
|
|
},
|
|
{
|
|
"questionId": "q147",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "58023",
|
|
"actual": "58023",
|
|
"correct": true,
|
|
"inputTokens": 9122,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1171
|
|
},
|
|
{
|
|
"questionId": "q147",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "58023",
|
|
"actual": "undefined/linux does not exist in the provided data.",
|
|
"correct": false,
|
|
"inputTokens": 15480,
|
|
"outputTokens": 11,
|
|
"latencyMs": 1680
|
|
},
|
|
{
|
|
"questionId": "q147",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "58023",
|
|
"actual": "58023",
|
|
"correct": true,
|
|
"inputTokens": 15364,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1396
|
|
},
|
|
{
|
|
"questionId": "q147",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "58023",
|
|
"actual": "The repository \"undefined/linux\" does not exist in the provided data.",
|
|
"correct": false,
|
|
"inputTokens": 13170,
|
|
"outputTokens": 15,
|
|
"latencyMs": 1418
|
|
},
|
|
{
|
|
"questionId": "q147",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "58023",
|
|
"actual": "58023",
|
|
"correct": true,
|
|
"inputTokens": 14480,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1399
|
|
},
|
|
{
|
|
"questionId": "q148",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "196024",
|
|
"actual": "196024",
|
|
"correct": true,
|
|
"inputTokens": 15189,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1673
|
|
},
|
|
{
|
|
"questionId": "q148",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "196024",
|
|
"actual": "196024",
|
|
"correct": true,
|
|
"inputTokens": 17407,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1736
|
|
},
|
|
{
|
|
"questionId": "q148",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "196024",
|
|
"actual": "196024",
|
|
"correct": true,
|
|
"inputTokens": 8790,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1754
|
|
},
|
|
{
|
|
"questionId": "q148",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "196024",
|
|
"actual": "196024",
|
|
"correct": true,
|
|
"inputTokens": 9277,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1317
|
|
},
|
|
{
|
|
"questionId": "q148",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "196024",
|
|
"actual": "0",
|
|
"correct": false,
|
|
"inputTokens": 8558,
|
|
"outputTokens": 2,
|
|
"latencyMs": 3219
|
|
},
|
|
{
|
|
"questionId": "q148",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "196024",
|
|
"actual": "196024",
|
|
"correct": true,
|
|
"inputTokens": 9123,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1311
|
|
},
|
|
{
|
|
"questionId": "q148",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "196024",
|
|
"actual": "196024",
|
|
"correct": true,
|
|
"inputTokens": 15483,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1346
|
|
},
|
|
{
|
|
"questionId": "q148",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "196024",
|
|
"actual": "196024",
|
|
"correct": true,
|
|
"inputTokens": 15365,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1560
|
|
},
|
|
{
|
|
"questionId": "q148",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "196024",
|
|
"actual": "196024",
|
|
"correct": true,
|
|
"inputTokens": 13173,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1009
|
|
},
|
|
{
|
|
"questionId": "q148",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "196024",
|
|
"actual": "196024",
|
|
"correct": true,
|
|
"inputTokens": 14481,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1446
|
|
},
|
|
{
|
|
"questionId": "q149",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "30919",
|
|
"actual": "30919",
|
|
"correct": true,
|
|
"inputTokens": 15189,
|
|
"outputTokens": 3,
|
|
"latencyMs": 3361
|
|
},
|
|
{
|
|
"questionId": "q149",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "30919",
|
|
"actual": "30919",
|
|
"correct": true,
|
|
"inputTokens": 17408,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1788
|
|
},
|
|
{
|
|
"questionId": "q149",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "30919",
|
|
"actual": "30919",
|
|
"correct": true,
|
|
"inputTokens": 8790,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1123
|
|
},
|
|
{
|
|
"questionId": "q149",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "30919",
|
|
"actual": "30919",
|
|
"correct": true,
|
|
"inputTokens": 9278,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1235
|
|
},
|
|
{
|
|
"questionId": "q149",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "30919",
|
|
"actual": "30919",
|
|
"correct": true,
|
|
"inputTokens": 8558,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1100
|
|
},
|
|
{
|
|
"questionId": "q149",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "30919",
|
|
"actual": "30919",
|
|
"correct": true,
|
|
"inputTokens": 9124,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1188
|
|
},
|
|
{
|
|
"questionId": "q149",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "30919",
|
|
"actual": "30919",
|
|
"correct": true,
|
|
"inputTokens": 15483,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1557
|
|
},
|
|
{
|
|
"questionId": "q149",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "30919",
|
|
"actual": "30919",
|
|
"correct": true,
|
|
"inputTokens": 15366,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1352
|
|
},
|
|
{
|
|
"questionId": "q149",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "30919",
|
|
"actual": "30919",
|
|
"correct": true,
|
|
"inputTokens": 13173,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1280
|
|
},
|
|
{
|
|
"questionId": "q149",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "30919",
|
|
"actual": "30919",
|
|
"correct": true,
|
|
"inputTokens": 14482,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1247
|
|
},
|
|
{
|
|
"questionId": "q150",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "192220",
|
|
"actual": "192220",
|
|
"correct": true,
|
|
"inputTokens": 15188,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1394
|
|
},
|
|
{
|
|
"questionId": "q150",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "192220",
|
|
"actual": "192220",
|
|
"correct": true,
|
|
"inputTokens": 17405,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1801
|
|
},
|
|
{
|
|
"questionId": "q150",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "192220",
|
|
"actual": "192220",
|
|
"correct": true,
|
|
"inputTokens": 8789,
|
|
"outputTokens": 3,
|
|
"latencyMs": 2052
|
|
},
|
|
{
|
|
"questionId": "q150",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "192220",
|
|
"actual": "192220",
|
|
"correct": true,
|
|
"inputTokens": 9275,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1176
|
|
},
|
|
{
|
|
"questionId": "q150",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "192220",
|
|
"actual": "192220",
|
|
"correct": true,
|
|
"inputTokens": 8557,
|
|
"outputTokens": 3,
|
|
"latencyMs": 2084
|
|
},
|
|
{
|
|
"questionId": "q150",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "192220",
|
|
"actual": "192220",
|
|
"correct": true,
|
|
"inputTokens": 9121,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1191
|
|
},
|
|
{
|
|
"questionId": "q150",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "192220",
|
|
"actual": "192220",
|
|
"correct": true,
|
|
"inputTokens": 15482,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1261
|
|
},
|
|
{
|
|
"questionId": "q150",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "192220",
|
|
"actual": "192220",
|
|
"correct": true,
|
|
"inputTokens": 15363,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1355
|
|
},
|
|
{
|
|
"questionId": "q150",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "192220",
|
|
"actual": "192220",
|
|
"correct": true,
|
|
"inputTokens": 13172,
|
|
"outputTokens": 3,
|
|
"latencyMs": 3388
|
|
},
|
|
{
|
|
"questionId": "q150",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "192220",
|
|
"actual": "192220",
|
|
"correct": true,
|
|
"inputTokens": 14479,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1591
|
|
},
|
|
{
|
|
"questionId": "q151",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "11763",
|
|
"actual": "11763",
|
|
"correct": true,
|
|
"inputTokens": 15191,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1942
|
|
},
|
|
{
|
|
"questionId": "q151",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "11763",
|
|
"actual": "11763",
|
|
"correct": true,
|
|
"inputTokens": 17414,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1340
|
|
},
|
|
{
|
|
"questionId": "q151",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "11763",
|
|
"actual": "11763",
|
|
"correct": true,
|
|
"inputTokens": 8792,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1443
|
|
},
|
|
{
|
|
"questionId": "q151",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "11763",
|
|
"actual": "11763",
|
|
"correct": true,
|
|
"inputTokens": 9284,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1732
|
|
},
|
|
{
|
|
"questionId": "q151",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "11763",
|
|
"actual": "11763",
|
|
"correct": true,
|
|
"inputTokens": 8560,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1994
|
|
},
|
|
{
|
|
"questionId": "q151",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "11763",
|
|
"actual": "11763",
|
|
"correct": true,
|
|
"inputTokens": 9130,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1198
|
|
},
|
|
{
|
|
"questionId": "q151",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "11763",
|
|
"actual": "11763",
|
|
"correct": true,
|
|
"inputTokens": 15485,
|
|
"outputTokens": 3,
|
|
"latencyMs": 5013
|
|
},
|
|
{
|
|
"questionId": "q151",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "11763",
|
|
"actual": "11763",
|
|
"correct": true,
|
|
"inputTokens": 15372,
|
|
"outputTokens": 6,
|
|
"latencyMs": 1463
|
|
},
|
|
{
|
|
"questionId": "q151",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "11763",
|
|
"actual": "11763",
|
|
"correct": true,
|
|
"inputTokens": 13175,
|
|
"outputTokens": 3,
|
|
"latencyMs": 1296
|
|
},
|
|
{
|
|
"questionId": "q151",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "11763",
|
|
"actual": "11763",
|
|
"correct": true,
|
|
"inputTokens": 14488,
|
|
"outputTokens": 6,
|
|
"latencyMs": 2877
|
|
},
|
|
{
|
|
"questionId": "q152",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "100",
|
|
"actual": "0",
|
|
"correct": false,
|
|
"inputTokens": 15188,
|
|
"outputTokens": 2,
|
|
"latencyMs": 2160
|
|
},
|
|
{
|
|
"questionId": "q152",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "100",
|
|
"actual": "0",
|
|
"correct": false,
|
|
"inputTokens": 17406,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1947
|
|
},
|
|
{
|
|
"questionId": "q152",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "100",
|
|
"actual": "0",
|
|
"correct": false,
|
|
"inputTokens": 8789,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1222
|
|
},
|
|
{
|
|
"questionId": "q152",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "100",
|
|
"actual": "0",
|
|
"correct": false,
|
|
"inputTokens": 9276,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1487
|
|
},
|
|
{
|
|
"questionId": "q152",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "100",
|
|
"actual": "0",
|
|
"correct": false,
|
|
"inputTokens": 8557,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1450
|
|
},
|
|
{
|
|
"questionId": "q152",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "100",
|
|
"actual": "0",
|
|
"correct": false,
|
|
"inputTokens": 9122,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1358
|
|
},
|
|
{
|
|
"questionId": "q152",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "100",
|
|
"actual": "0",
|
|
"correct": false,
|
|
"inputTokens": 15482,
|
|
"outputTokens": 2,
|
|
"latencyMs": 873
|
|
},
|
|
{
|
|
"questionId": "q152",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "100",
|
|
"actual": "100",
|
|
"correct": true,
|
|
"inputTokens": 15364,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1500
|
|
},
|
|
{
|
|
"questionId": "q152",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "100",
|
|
"actual": "0",
|
|
"correct": false,
|
|
"inputTokens": 13172,
|
|
"outputTokens": 2,
|
|
"latencyMs": 7031
|
|
},
|
|
{
|
|
"questionId": "q152",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "100",
|
|
"actual": "0",
|
|
"correct": false,
|
|
"inputTokens": 14480,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1916
|
|
},
|
|
{
|
|
"questionId": "q153",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "15404143",
|
|
"actual": "43115556",
|
|
"correct": false,
|
|
"inputTokens": 15189,
|
|
"outputTokens": 4,
|
|
"latencyMs": 3324
|
|
},
|
|
{
|
|
"questionId": "q153",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "15404143",
|
|
"actual": "13,847,892",
|
|
"correct": false,
|
|
"inputTokens": 17407,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1607
|
|
},
|
|
{
|
|
"questionId": "q153",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "15404143",
|
|
"actual": "10419582",
|
|
"correct": false,
|
|
"inputTokens": 8790,
|
|
"outputTokens": 4,
|
|
"latencyMs": 900
|
|
},
|
|
{
|
|
"questionId": "q153",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "15404143",
|
|
"actual": "13,847,892",
|
|
"correct": false,
|
|
"inputTokens": 9277,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1385
|
|
},
|
|
{
|
|
"questionId": "q153",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "15404143",
|
|
"actual": "10419582",
|
|
"correct": false,
|
|
"inputTokens": 8558,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1922
|
|
},
|
|
{
|
|
"questionId": "q153",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "15404143",
|
|
"actual": "15,847,892",
|
|
"correct": false,
|
|
"inputTokens": 9123,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1230
|
|
},
|
|
{
|
|
"questionId": "q153",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "15404143",
|
|
"actual": "10419580",
|
|
"correct": false,
|
|
"inputTokens": 15483,
|
|
"outputTokens": 4,
|
|
"latencyMs": 1716
|
|
},
|
|
{
|
|
"questionId": "q153",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "15404143",
|
|
"actual": "13,847,892",
|
|
"correct": false,
|
|
"inputTokens": 15365,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1384
|
|
},
|
|
{
|
|
"questionId": "q153",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "15404143",
|
|
"actual": "Total number of stars across all repositories is 4,978,155.",
|
|
"correct": false,
|
|
"inputTokens": 13173,
|
|
"outputTokens": 16,
|
|
"latencyMs": 3411
|
|
},
|
|
{
|
|
"questionId": "q153",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "15404143",
|
|
"actual": "13,847,892",
|
|
"correct": false,
|
|
"inputTokens": 14481,
|
|
"outputTokens": 9,
|
|
"latencyMs": 1539
|
|
},
|
|
{
|
|
"questionId": "q154",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "100",
|
|
"actual": "77",
|
|
"correct": false,
|
|
"inputTokens": 15189,
|
|
"outputTokens": 2,
|
|
"latencyMs": 2523
|
|
},
|
|
{
|
|
"questionId": "q154",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "100",
|
|
"actual": "100",
|
|
"correct": true,
|
|
"inputTokens": 17408,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1885
|
|
},
|
|
{
|
|
"questionId": "q154",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "100",
|
|
"actual": "42",
|
|
"correct": false,
|
|
"inputTokens": 8790,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1148
|
|
},
|
|
{
|
|
"questionId": "q154",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "100",
|
|
"actual": "100",
|
|
"correct": true,
|
|
"inputTokens": 9278,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1378
|
|
},
|
|
{
|
|
"questionId": "q154",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "100",
|
|
"actual": "42",
|
|
"correct": false,
|
|
"inputTokens": 8558,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1364
|
|
},
|
|
{
|
|
"questionId": "q154",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "100",
|
|
"actual": "100",
|
|
"correct": true,
|
|
"inputTokens": 9124,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1125
|
|
},
|
|
{
|
|
"questionId": "q154",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "100",
|
|
"actual": "104",
|
|
"correct": false,
|
|
"inputTokens": 15483,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1276
|
|
},
|
|
{
|
|
"questionId": "q154",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "100",
|
|
"actual": "100",
|
|
"correct": true,
|
|
"inputTokens": 15366,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1331
|
|
},
|
|
{
|
|
"questionId": "q154",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "100",
|
|
"actual": "77",
|
|
"correct": false,
|
|
"inputTokens": 13173,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1534
|
|
},
|
|
{
|
|
"questionId": "q154",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "100",
|
|
"actual": "100",
|
|
"correct": true,
|
|
"inputTokens": 14482,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1282
|
|
},
|
|
{
|
|
"questionId": "q155",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "100",
|
|
"actual": "19",
|
|
"correct": false,
|
|
"inputTokens": 15189,
|
|
"outputTokens": 2,
|
|
"latencyMs": 2206
|
|
},
|
|
{
|
|
"questionId": "q155",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "100",
|
|
"actual": "71",
|
|
"correct": false,
|
|
"inputTokens": 17408,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1568
|
|
},
|
|
{
|
|
"questionId": "q155",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "100",
|
|
"actual": "15",
|
|
"correct": false,
|
|
"inputTokens": 8790,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1478
|
|
},
|
|
{
|
|
"questionId": "q155",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "100",
|
|
"actual": "42",
|
|
"correct": false,
|
|
"inputTokens": 9278,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1314
|
|
},
|
|
{
|
|
"questionId": "q155",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "100",
|
|
"actual": "12",
|
|
"correct": false,
|
|
"inputTokens": 8558,
|
|
"outputTokens": 2,
|
|
"latencyMs": 2149
|
|
},
|
|
{
|
|
"questionId": "q155",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "100",
|
|
"actual": "47",
|
|
"correct": false,
|
|
"inputTokens": 9124,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1485
|
|
},
|
|
{
|
|
"questionId": "q155",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "100",
|
|
"actual": "34",
|
|
"correct": false,
|
|
"inputTokens": 15483,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1043
|
|
},
|
|
{
|
|
"questionId": "q155",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "100",
|
|
"actual": "71",
|
|
"correct": false,
|
|
"inputTokens": 15366,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1371
|
|
},
|
|
{
|
|
"questionId": "q155",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "100",
|
|
"actual": "34",
|
|
"correct": false,
|
|
"inputTokens": 13173,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1693
|
|
},
|
|
{
|
|
"questionId": "q155",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "100",
|
|
"actual": "71",
|
|
"correct": false,
|
|
"inputTokens": 14482,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1237
|
|
},
|
|
{
|
|
"questionId": "q156",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "76",
|
|
"actual": "82",
|
|
"correct": false,
|
|
"inputTokens": 15189,
|
|
"outputTokens": 2,
|
|
"latencyMs": 927
|
|
},
|
|
{
|
|
"questionId": "q156",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "76",
|
|
"actual": "100",
|
|
"correct": false,
|
|
"inputTokens": 17408,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1274
|
|
},
|
|
{
|
|
"questionId": "q156",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "76",
|
|
"actual": "34",
|
|
"correct": false,
|
|
"inputTokens": 8790,
|
|
"outputTokens": 2,
|
|
"latencyMs": 2541
|
|
},
|
|
{
|
|
"questionId": "q156",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "76",
|
|
"actual": "100",
|
|
"correct": false,
|
|
"inputTokens": 9278,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1116
|
|
},
|
|
{
|
|
"questionId": "q156",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "76",
|
|
"actual": "34",
|
|
"correct": false,
|
|
"inputTokens": 8558,
|
|
"outputTokens": 2,
|
|
"latencyMs": 997
|
|
},
|
|
{
|
|
"questionId": "q156",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "76",
|
|
"actual": "100",
|
|
"correct": false,
|
|
"inputTokens": 9124,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1513
|
|
},
|
|
{
|
|
"questionId": "q156",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "76",
|
|
"actual": "104",
|
|
"correct": false,
|
|
"inputTokens": 15483,
|
|
"outputTokens": 2,
|
|
"latencyMs": 3168
|
|
},
|
|
{
|
|
"questionId": "q156",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "76",
|
|
"actual": "100",
|
|
"correct": false,
|
|
"inputTokens": 15366,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1498
|
|
},
|
|
{
|
|
"questionId": "q156",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "76",
|
|
"actual": "66",
|
|
"correct": false,
|
|
"inputTokens": 13173,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1600
|
|
},
|
|
{
|
|
"questionId": "q156",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "76",
|
|
"actual": "100",
|
|
"correct": false,
|
|
"inputTokens": 14482,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1519
|
|
},
|
|
{
|
|
"questionId": "q157",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "100",
|
|
"actual": "77",
|
|
"correct": false,
|
|
"inputTokens": 15189,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1809
|
|
},
|
|
{
|
|
"questionId": "q157",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "100",
|
|
"actual": "89",
|
|
"correct": false,
|
|
"inputTokens": 17409,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1409
|
|
},
|
|
{
|
|
"questionId": "q157",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "100",
|
|
"actual": "66",
|
|
"correct": false,
|
|
"inputTokens": 8790,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1367
|
|
},
|
|
{
|
|
"questionId": "q157",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "100",
|
|
"actual": "73",
|
|
"correct": false,
|
|
"inputTokens": 9279,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1296
|
|
},
|
|
{
|
|
"questionId": "q157",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "100",
|
|
"actual": "66",
|
|
"correct": false,
|
|
"inputTokens": 8558,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1162
|
|
},
|
|
{
|
|
"questionId": "q157",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "100",
|
|
"actual": "89",
|
|
"correct": false,
|
|
"inputTokens": 9125,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1435
|
|
},
|
|
{
|
|
"questionId": "q157",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "100",
|
|
"actual": "77",
|
|
"correct": false,
|
|
"inputTokens": 15483,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1774
|
|
},
|
|
{
|
|
"questionId": "q157",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "100",
|
|
"actual": "95",
|
|
"correct": false,
|
|
"inputTokens": 15367,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1479
|
|
},
|
|
{
|
|
"questionId": "q157",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "100",
|
|
"actual": "66",
|
|
"correct": false,
|
|
"inputTokens": 13173,
|
|
"outputTokens": 2,
|
|
"latencyMs": 2710
|
|
},
|
|
{
|
|
"questionId": "q157",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "100",
|
|
"actual": "95",
|
|
"correct": false,
|
|
"inputTokens": 14483,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1272
|
|
},
|
|
{
|
|
"questionId": "q158",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "95",
|
|
"actual": "42",
|
|
"correct": false,
|
|
"inputTokens": 15189,
|
|
"outputTokens": 2,
|
|
"latencyMs": 3038
|
|
},
|
|
{
|
|
"questionId": "q158",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "95",
|
|
"actual": "42",
|
|
"correct": false,
|
|
"inputTokens": 17409,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1562
|
|
},
|
|
{
|
|
"questionId": "q158",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "95",
|
|
"actual": "38",
|
|
"correct": false,
|
|
"inputTokens": 8790,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1536
|
|
},
|
|
{
|
|
"questionId": "q158",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "95",
|
|
"actual": "42",
|
|
"correct": false,
|
|
"inputTokens": 9279,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1216
|
|
},
|
|
{
|
|
"questionId": "q158",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "95",
|
|
"actual": "34",
|
|
"correct": false,
|
|
"inputTokens": 8558,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1760
|
|
},
|
|
{
|
|
"questionId": "q158",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "95",
|
|
"actual": "42",
|
|
"correct": false,
|
|
"inputTokens": 9125,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1255
|
|
},
|
|
{
|
|
"questionId": "q158",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "95",
|
|
"actual": "66",
|
|
"correct": false,
|
|
"inputTokens": 15483,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1683
|
|
},
|
|
{
|
|
"questionId": "q158",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "95",
|
|
"actual": "47",
|
|
"correct": false,
|
|
"inputTokens": 15367,
|
|
"outputTokens": 5,
|
|
"latencyMs": 2256
|
|
},
|
|
{
|
|
"questionId": "q158",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "95",
|
|
"actual": "38",
|
|
"correct": false,
|
|
"inputTokens": 13173,
|
|
"outputTokens": 2,
|
|
"latencyMs": 2831
|
|
},
|
|
{
|
|
"questionId": "q158",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "95",
|
|
"actual": "47",
|
|
"correct": false,
|
|
"inputTokens": 14483,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1980
|
|
},
|
|
{
|
|
"questionId": "q159",
|
|
"format": "json",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "83",
|
|
"actual": "66",
|
|
"correct": false,
|
|
"inputTokens": 15189,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1327
|
|
},
|
|
{
|
|
"questionId": "q159",
|
|
"format": "json",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "83",
|
|
"actual": "71",
|
|
"correct": false,
|
|
"inputTokens": 17409,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1894
|
|
},
|
|
{
|
|
"questionId": "q159",
|
|
"format": "toon",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "83",
|
|
"actual": "34",
|
|
"correct": false,
|
|
"inputTokens": 8790,
|
|
"outputTokens": 2,
|
|
"latencyMs": 784
|
|
},
|
|
{
|
|
"questionId": "q159",
|
|
"format": "toon",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "83",
|
|
"actual": "73",
|
|
"correct": false,
|
|
"inputTokens": 9279,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1422
|
|
},
|
|
{
|
|
"questionId": "q159",
|
|
"format": "csv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "83",
|
|
"actual": "34",
|
|
"correct": false,
|
|
"inputTokens": 8558,
|
|
"outputTokens": 2,
|
|
"latencyMs": 2644
|
|
},
|
|
{
|
|
"questionId": "q159",
|
|
"format": "csv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "83",
|
|
"actual": "73",
|
|
"correct": false,
|
|
"inputTokens": 9125,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1109
|
|
},
|
|
{
|
|
"questionId": "q159",
|
|
"format": "markdown-kv",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "83",
|
|
"actual": "66",
|
|
"correct": false,
|
|
"inputTokens": 15483,
|
|
"outputTokens": 2,
|
|
"latencyMs": 1826
|
|
},
|
|
{
|
|
"questionId": "q159",
|
|
"format": "markdown-kv",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "83",
|
|
"actual": "71",
|
|
"correct": false,
|
|
"inputTokens": 15367,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1342
|
|
},
|
|
{
|
|
"questionId": "q159",
|
|
"format": "yaml",
|
|
"model": "gpt-4o-mini",
|
|
"expected": "83",
|
|
"actual": "38",
|
|
"correct": false,
|
|
"inputTokens": 13173,
|
|
"outputTokens": 2,
|
|
"latencyMs": 2055
|
|
},
|
|
{
|
|
"questionId": "q159",
|
|
"format": "yaml",
|
|
"model": "claude-haiku-4-5",
|
|
"expected": "83",
|
|
"actual": "71",
|
|
"correct": false,
|
|
"inputTokens": 14483,
|
|
"outputTokens": 5,
|
|
"latencyMs": 1537
|
|
}
|
|
]
|