[ { "questionId": "q1", "format": "json", "model": "gpt-4o-mini", "expected": "56176", "actual": "56176", "correct": true, "inputTokens": 6391, "outputTokens": 3, "latencyMs": 1313 }, { "questionId": "q1", "format": "json", "model": "claude-haiku-4-5", "expected": "56176", "actual": "56176", "correct": true, "inputTokens": 7870, "outputTokens": 6, "latencyMs": 1346 }, { "questionId": "q1", "format": "toon", "model": "gpt-4o-mini", "expected": "56176", "actual": "56176", "correct": true, "inputTokens": 2528, "outputTokens": 3, "latencyMs": 1191 }, { "questionId": "q1", "format": "toon", "model": "claude-haiku-4-5", "expected": "56176", "actual": "56176", "correct": true, "inputTokens": 2982, "outputTokens": 6, "latencyMs": 1399 }, { "questionId": "q1", "format": "csv", "model": "gpt-4o-mini", "expected": "56176", "actual": "56176", "correct": true, "inputTokens": 2382, "outputTokens": 3, "latencyMs": 5010 }, { "questionId": "q1", "format": "csv", "model": "claude-haiku-4-5", "expected": "56176", "actual": "56176", "correct": true, "inputTokens": 2856, "outputTokens": 6, "latencyMs": 1472 }, { "questionId": "q1", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "56176", "actual": "56176", "correct": true, "inputTokens": 6317, "outputTokens": 3, "latencyMs": 1667 }, { "questionId": "q1", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "56176", "actual": "56176", "correct": true, "inputTokens": 6365, "outputTokens": 6, "latencyMs": 1507 }, { "questionId": "q1", "format": "yaml", "model": "gpt-4o-mini", "expected": "56176", "actual": "56176", "correct": true, "inputTokens": 5013, "outputTokens": 3, "latencyMs": 1325 }, { "questionId": "q1", "format": "yaml", "model": "claude-haiku-4-5", "expected": "56176", "actual": "56176", "correct": true, "inputTokens": 5760, "outputTokens": 6, "latencyMs": 2280 }, { "questionId": "q2", "format": "json", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 6391, "outputTokens": 2, "latencyMs": 3167 }, { "questionId": "q2", "format": "json", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 7869, "outputTokens": 4, "latencyMs": 1267 }, { "questionId": "q2", "format": "toon", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 2528, "outputTokens": 2, "latencyMs": 1402 }, { "questionId": "q2", "format": "toon", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 2981, "outputTokens": 4, "latencyMs": 1290 }, { "questionId": "q2", "format": "csv", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 2382, "outputTokens": 2, "latencyMs": 5070 }, { "questionId": "q2", "format": "csv", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 2855, "outputTokens": 4, "latencyMs": 1320 }, { "questionId": "q2", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 6317, "outputTokens": 2, "latencyMs": 1745 }, { "questionId": "q2", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 6364, "outputTokens": 4, "latencyMs": 1191 }, { "questionId": "q2", "format": "yaml", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 5013, "outputTokens": 2, "latencyMs": 2713 }, { "questionId": "q2", "format": "yaml", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 5759, "outputTokens": 4, "latencyMs": 1309 }, { "questionId": "q3", "format": "json", "model": "gpt-4o-mini", "expected": "lorenza.kunze@yahoo.com", "actual": "lorenza.kunze@yahoo.com", "correct": true, "inputTokens": 6393, "outputTokens": 7, "latencyMs": 1160 }, { "questionId": "q3", "format": "json", "model": "claude-haiku-4-5", "expected": "lorenza.kunze@yahoo.com", "actual": "lorenza.kunze@yahoo.com", "correct": true, "inputTokens": 7874, "outputTokens": 12, "latencyMs": 1338 }, { "questionId": "q3", "format": "toon", "model": "gpt-4o-mini", "expected": "lorenza.kunze@yahoo.com", "actual": "lorenza.kunze@yahoo.com", "correct": true, "inputTokens": 2530, "outputTokens": 7, "latencyMs": 1478 }, { "questionId": "q3", "format": "toon", "model": "claude-haiku-4-5", "expected": "lorenza.kunze@yahoo.com", "actual": "lorenza.kunze@yahoo.com", "correct": true, "inputTokens": 2986, "outputTokens": 12, "latencyMs": 1563 }, { "questionId": "q3", "format": "csv", "model": "gpt-4o-mini", "expected": "lorenza.kunze@yahoo.com", "actual": "lorenza.kunze@yahoo.com", "correct": true, "inputTokens": 2384, "outputTokens": 7, "latencyMs": 1310 }, { "questionId": "q3", "format": "csv", "model": "claude-haiku-4-5", "expected": "lorenza.kunze@yahoo.com", "actual": "lorenza.kunze@yahoo.com", "correct": true, "inputTokens": 2860, "outputTokens": 12, "latencyMs": 1236 }, { "questionId": "q3", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "lorenza.kunze@yahoo.com", "actual": "lorenza.kunze@yahoo.com", "correct": true, "inputTokens": 6319, "outputTokens": 7, "latencyMs": 2236 }, { "questionId": "q3", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "lorenza.kunze@yahoo.com", "actual": "lorenza.kunze@yahoo.com", "correct": true, "inputTokens": 6369, "outputTokens": 12, "latencyMs": 1253 }, { "questionId": "q3", "format": "yaml", "model": "gpt-4o-mini", "expected": "lorenza.kunze@yahoo.com", "actual": "lorenza.kunze@yahoo.com", "correct": true, "inputTokens": 5015, "outputTokens": 7, "latencyMs": 1917 }, { "questionId": "q3", "format": "yaml", "model": "claude-haiku-4-5", "expected": "lorenza.kunze@yahoo.com", "actual": "lorenza.kunze@yahoo.com", "correct": true, "inputTokens": 5764, "outputTokens": 12, "latencyMs": 1332 }, { "questionId": "q4", "format": "json", "model": "gpt-4o-mini", "expected": "117381", "actual": "117381", "correct": true, "inputTokens": 6391, "outputTokens": 3, "latencyMs": 2945 }, { "questionId": "q4", "format": "json", "model": "claude-haiku-4-5", "expected": "117381", "actual": "117381", "correct": true, "inputTokens": 7870, "outputTokens": 6, "latencyMs": 1773 }, { "questionId": "q4", "format": "toon", "model": "gpt-4o-mini", "expected": "117381", "actual": "117381", "correct": true, "inputTokens": 2528, "outputTokens": 3, "latencyMs": 1294 }, { "questionId": "q4", "format": "toon", "model": "claude-haiku-4-5", "expected": "117381", "actual": "117381", "correct": true, "inputTokens": 2982, "outputTokens": 6, "latencyMs": 980 }, { "questionId": "q4", "format": "csv", "model": "gpt-4o-mini", "expected": "117381", "actual": "117381", "correct": true, "inputTokens": 2382, "outputTokens": 3, "latencyMs": 1747 }, { "questionId": "q4", "format": "csv", "model": "claude-haiku-4-5", "expected": "117381", "actual": "117381", "correct": true, "inputTokens": 2856, "outputTokens": 6, "latencyMs": 1197 }, { "questionId": "q4", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "117381", "actual": "117381", "correct": true, "inputTokens": 6317, "outputTokens": 3, "latencyMs": 1039 }, { "questionId": "q4", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "117381", "actual": "117381", "correct": true, "inputTokens": 6365, "outputTokens": 6, "latencyMs": 1453 }, { "questionId": "q4", "format": "yaml", "model": "gpt-4o-mini", "expected": "117381", "actual": "117381", "correct": true, "inputTokens": 5013, "outputTokens": 3, "latencyMs": 1056 }, { "questionId": "q4", "format": "yaml", "model": "claude-haiku-4-5", "expected": "117381", "actual": "117381", "correct": true, "inputTokens": 5760, "outputTokens": 6, "latencyMs": 1564 }, { "questionId": "q5", "format": "json", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 6390, "outputTokens": 2, "latencyMs": 1263 }, { "questionId": "q5", "format": "json", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 7868, "outputTokens": 4, "latencyMs": 1097 }, { "questionId": "q5", "format": "toon", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 2527, "outputTokens": 2, "latencyMs": 1248 }, { "questionId": "q5", "format": "toon", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 2980, "outputTokens": 4, "latencyMs": 1486 }, { "questionId": "q5", "format": "csv", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 2381, "outputTokens": 2, "latencyMs": 1311 }, { "questionId": "q5", "format": "csv", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 2854, "outputTokens": 4, "latencyMs": 1019 }, { "questionId": "q5", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 6316, "outputTokens": 2, "latencyMs": 1287 }, { "questionId": "q5", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 6363, "outputTokens": 4, "latencyMs": 1243 }, { "questionId": "q5", "format": "yaml", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 5012, "outputTokens": 2, "latencyMs": 1339 }, { "questionId": "q5", "format": "yaml", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 5758, "outputTokens": 4, "latencyMs": 1621 }, { "questionId": "q6", "format": "json", "model": "gpt-4o-mini", "expected": "jayda60@hotmail.com", "actual": "jayda60@hotmail.com", "correct": true, "inputTokens": 6391, "outputTokens": 6, "latencyMs": 1625 }, { "questionId": "q6", "format": "json", "model": "claude-haiku-4-5", "expected": "jayda60@hotmail.com", "actual": "jayda60@hotmail.com", "correct": true, "inputTokens": 7871, "outputTokens": 11, "latencyMs": 1328 }, { "questionId": "q6", "format": "toon", "model": "gpt-4o-mini", "expected": "jayda60@hotmail.com", "actual": "jayda60@hotmail.com", "correct": true, "inputTokens": 2528, "outputTokens": 6, "latencyMs": 1463 }, { "questionId": "q6", "format": "toon", "model": "claude-haiku-4-5", "expected": "jayda60@hotmail.com", "actual": "jayda60@hotmail.com", "correct": true, "inputTokens": 2983, "outputTokens": 11, "latencyMs": 1149 }, { "questionId": "q6", "format": "csv", "model": "gpt-4o-mini", "expected": "jayda60@hotmail.com", "actual": "jayda60@hotmail.com", "correct": true, "inputTokens": 2382, "outputTokens": 6, "latencyMs": 1474 }, { "questionId": "q6", "format": "csv", "model": "claude-haiku-4-5", "expected": "jayda60@hotmail.com", "actual": "jayda60@hotmail.com", "correct": true, "inputTokens": 2857, "outputTokens": 11, "latencyMs": 977 }, { "questionId": "q6", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "jayda60@hotmail.com", "actual": "jayda60@hotmail.com", "correct": true, "inputTokens": 6317, "outputTokens": 6, "latencyMs": 2079 }, { "questionId": "q6", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "jayda60@hotmail.com", "actual": "jayda60@hotmail.com", "correct": true, "inputTokens": 6366, "outputTokens": 11, "latencyMs": 1134 }, { "questionId": "q6", "format": "yaml", "model": "gpt-4o-mini", "expected": "jayda60@hotmail.com", "actual": "jayda60@hotmail.com", "correct": true, "inputTokens": 5013, "outputTokens": 6, "latencyMs": 1124 }, { "questionId": "q6", "format": "yaml", "model": "claude-haiku-4-5", "expected": "jayda60@hotmail.com", "actual": "jayda60@hotmail.com", "correct": true, "inputTokens": 5761, "outputTokens": 11, "latencyMs": 1053 }, { "questionId": "q7", "format": "json", "model": "gpt-4o-mini", "expected": "92971", "actual": "92971", "correct": true, "inputTokens": 6391, "outputTokens": 3, "latencyMs": 1427 }, { "questionId": "q7", "format": "json", "model": "claude-haiku-4-5", "expected": "92971", "actual": "92971", "correct": true, "inputTokens": 7870, "outputTokens": 6, "latencyMs": 1246 }, { "questionId": "q7", "format": "toon", "model": "gpt-4o-mini", "expected": "92971", "actual": "92971", "correct": true, "inputTokens": 2528, "outputTokens": 3, "latencyMs": 1171 }, { "questionId": "q7", "format": "toon", "model": "claude-haiku-4-5", "expected": "92971", "actual": "92971", "correct": true, "inputTokens": 2982, "outputTokens": 6, "latencyMs": 1547 }, { "questionId": "q7", "format": "csv", "model": "gpt-4o-mini", "expected": "92971", "actual": "92971", "correct": true, "inputTokens": 2382, "outputTokens": 3, "latencyMs": 1523 }, { "questionId": "q7", "format": "csv", "model": "claude-haiku-4-5", "expected": "92971", "actual": "92971", "correct": true, "inputTokens": 2856, "outputTokens": 6, "latencyMs": 1148 }, { "questionId": "q7", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "92971", "actual": "92971", "correct": true, "inputTokens": 6317, "outputTokens": 3, "latencyMs": 1360 }, { "questionId": "q7", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "92971", "actual": "92971", "correct": true, "inputTokens": 6365, "outputTokens": 6, "latencyMs": 1100 }, { "questionId": "q7", "format": "yaml", "model": "gpt-4o-mini", "expected": "92971", "actual": "92971", "correct": true, "inputTokens": 5013, "outputTokens": 3, "latencyMs": 1116 }, { "questionId": "q7", "format": "yaml", "model": "claude-haiku-4-5", "expected": "92971", "actual": "92971", "correct": true, "inputTokens": 5760, "outputTokens": 6, "latencyMs": 1202 }, { "questionId": "q8", "format": "json", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Operations", "correct": false, "inputTokens": 6391, "outputTokens": 2, "latencyMs": 974 }, { "questionId": "q8", "format": "json", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 7871, "outputTokens": 4, "latencyMs": 1357 }, { "questionId": "q8", "format": "toon", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 2528, "outputTokens": 2, "latencyMs": 1107 }, { "questionId": "q8", "format": "toon", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 2983, "outputTokens": 4, "latencyMs": 1126 }, { "questionId": "q8", "format": "csv", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 2382, "outputTokens": 2, "latencyMs": 1124 }, { "questionId": "q8", "format": "csv", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 2857, "outputTokens": 4, "latencyMs": 1208 }, { "questionId": "q8", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Operations", "correct": false, "inputTokens": 6317, "outputTokens": 2, "latencyMs": 1463 }, { "questionId": "q8", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 6366, "outputTokens": 4, "latencyMs": 1175 }, { "questionId": "q8", "format": "yaml", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 5013, "outputTokens": 2, "latencyMs": 1952 }, { "questionId": "q8", "format": "yaml", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 5761, "outputTokens": 4, "latencyMs": 1271 }, { "questionId": "q9", "format": "json", "model": "gpt-4o-mini", "expected": "terrance.hansen@yahoo.com", "actual": "terrance.hansen@yahoo.com", "correct": true, "inputTokens": 6393, "outputTokens": 7, "latencyMs": 1301 }, { "questionId": "q9", "format": "json", "model": "claude-haiku-4-5", "expected": "terrance.hansen@yahoo.com", "actual": "terrance.hansen@yahoo.com", "correct": true, "inputTokens": 7871, "outputTokens": 11, "latencyMs": 1371 }, { "questionId": "q9", "format": "toon", "model": "gpt-4o-mini", "expected": "terrance.hansen@yahoo.com", "actual": "terrance.hansen@yahoo.com", "correct": true, "inputTokens": 2530, "outputTokens": 7, "latencyMs": 1197 }, { "questionId": "q9", "format": "toon", "model": "claude-haiku-4-5", "expected": "terrance.hansen@yahoo.com", "actual": "terrance.hansen@yahoo.com", "correct": true, "inputTokens": 2983, "outputTokens": 11, "latencyMs": 1088 }, { "questionId": "q9", "format": "csv", "model": "gpt-4o-mini", "expected": "terrance.hansen@yahoo.com", "actual": "terrance.hansen@yahoo.com", "correct": true, "inputTokens": 2384, "outputTokens": 7, "latencyMs": 1310 }, { "questionId": "q9", "format": "csv", "model": "claude-haiku-4-5", "expected": "terrance.hansen@yahoo.com", "actual": "terrance.hansen@yahoo.com", "correct": true, "inputTokens": 2857, "outputTokens": 11, "latencyMs": 1300 }, { "questionId": "q9", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "terrance.hansen@yahoo.com", "actual": "terrance.hansen@yahoo.com", "correct": true, "inputTokens": 6319, "outputTokens": 7, "latencyMs": 1531 }, { "questionId": "q9", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "terrance.hansen@yahoo.com", "actual": "terrance.hansen@yahoo.com", "correct": true, "inputTokens": 6366, "outputTokens": 11, "latencyMs": 1275 }, { "questionId": "q9", "format": "yaml", "model": "gpt-4o-mini", "expected": "terrance.hansen@yahoo.com", "actual": "terrence.hansen@yahoo.com", "correct": false, "inputTokens": 5015, "outputTokens": 7, "latencyMs": 1245 }, { "questionId": "q9", "format": "yaml", "model": "claude-haiku-4-5", "expected": "terrance.hansen@yahoo.com", "actual": "terrance.hansen@yahoo.com", "correct": true, "inputTokens": 5761, "outputTokens": 11, "latencyMs": 1215 }, { "questionId": "q10", "format": "json", "model": "gpt-4o-mini", "expected": "107744", "actual": "107744", "correct": true, "inputTokens": 6392, "outputTokens": 3, "latencyMs": 4959 }, { "questionId": "q10", "format": "json", "model": "claude-haiku-4-5", "expected": "107744", "actual": "107744", "correct": true, "inputTokens": 7870, "outputTokens": 6, "latencyMs": 1269 }, { "questionId": "q10", "format": "toon", "model": "gpt-4o-mini", "expected": "107744", "actual": "107744", "correct": true, "inputTokens": 2529, "outputTokens": 3, "latencyMs": 1111 }, { "questionId": "q10", "format": "toon", "model": "claude-haiku-4-5", "expected": "107744", "actual": "107744", "correct": true, "inputTokens": 2982, "outputTokens": 6, "latencyMs": 1254 }, { "questionId": "q10", "format": "csv", "model": "gpt-4o-mini", "expected": "107744", "actual": "107744", "correct": true, "inputTokens": 2383, "outputTokens": 3, "latencyMs": 1616 }, { "questionId": "q10", "format": "csv", "model": "claude-haiku-4-5", "expected": "107744", "actual": "107744", "correct": true, "inputTokens": 2856, "outputTokens": 6, "latencyMs": 1123 }, { "questionId": "q10", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "107744", "actual": "107744", "correct": true, "inputTokens": 6318, "outputTokens": 3, "latencyMs": 1201 }, { "questionId": "q10", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "107744", "actual": "107744", "correct": true, "inputTokens": 6365, "outputTokens": 6, "latencyMs": 1371 }, { "questionId": "q10", "format": "yaml", "model": "gpt-4o-mini", "expected": "107744", "actual": "107744", "correct": true, "inputTokens": 5014, "outputTokens": 3, "latencyMs": 1503 }, { "questionId": "q10", "format": "yaml", "model": "claude-haiku-4-5", "expected": "107744", "actual": "107744", "correct": true, "inputTokens": 5760, "outputTokens": 6, "latencyMs": 1249 }, { "questionId": "q11", "format": "json", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 6391, "outputTokens": 2, "latencyMs": 1383 }, { "questionId": "q11", "format": "json", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 7869, "outputTokens": 4, "latencyMs": 1081 }, { "questionId": "q11", "format": "toon", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 2528, "outputTokens": 2, "latencyMs": 1677 }, { "questionId": "q11", "format": "toon", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 2981, "outputTokens": 4, "latencyMs": 1072 }, { "questionId": "q11", "format": "csv", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 2382, "outputTokens": 2, "latencyMs": 1142 }, { "questionId": "q11", "format": "csv", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 2855, "outputTokens": 4, "latencyMs": 991 }, { "questionId": "q11", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 6317, "outputTokens": 2, "latencyMs": 1339 }, { "questionId": "q11", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 6364, "outputTokens": 4, "latencyMs": 1117 }, { "questionId": "q11", "format": "yaml", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 5013, "outputTokens": 2, "latencyMs": 2483 }, { "questionId": "q11", "format": "yaml", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 5759, "outputTokens": 4, "latencyMs": 1187 }, { "questionId": "q12", "format": "json", "model": "gpt-4o-mini", "expected": "allan21@gmail.com", "actual": "allan21@gmail.com", "correct": true, "inputTokens": 6390, "outputTokens": 5, "latencyMs": 1827 }, { "questionId": "q12", "format": "json", "model": "claude-haiku-4-5", "expected": "allan21@gmail.com", "actual": "allan21@gmail.com", "correct": true, "inputTokens": 7867, "outputTokens": 9, "latencyMs": 1121 }, { "questionId": "q12", "format": "toon", "model": "gpt-4o-mini", "expected": "allan21@gmail.com", "actual": "allan21@gmail.com", "correct": true, "inputTokens": 2527, "outputTokens": 5, "latencyMs": 1373 }, { "questionId": "q12", "format": "toon", "model": "claude-haiku-4-5", "expected": "allan21@gmail.com", "actual": "allan21@gmail.com", "correct": true, "inputTokens": 2979, "outputTokens": 9, "latencyMs": 1284 }, { "questionId": "q12", "format": "csv", "model": "gpt-4o-mini", "expected": "allan21@gmail.com", "actual": "allan21@gmail.com", "correct": true, "inputTokens": 2381, "outputTokens": 5, "latencyMs": 1751 }, { "questionId": "q12", "format": "csv", "model": "claude-haiku-4-5", "expected": "allan21@gmail.com", "actual": "allan21@gmail.com", "correct": true, "inputTokens": 2853, "outputTokens": 9, "latencyMs": 1140 }, { "questionId": "q12", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "allan21@gmail.com", "actual": "allan21@gmail.com", "correct": true, "inputTokens": 6316, "outputTokens": 5, "latencyMs": 1624 }, { "questionId": "q12", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "allan21@gmail.com", "actual": "allan21@gmail.com", "correct": true, "inputTokens": 6362, "outputTokens": 9, "latencyMs": 1071 }, { "questionId": "q12", "format": "yaml", "model": "gpt-4o-mini", "expected": "allan21@gmail.com", "actual": "allan21@gmail.com", "correct": true, "inputTokens": 5012, "outputTokens": 5, "latencyMs": 1970 }, { "questionId": "q12", "format": "yaml", "model": "claude-haiku-4-5", "expected": "allan21@gmail.com", "actual": "allan21@gmail.com", "correct": true, "inputTokens": 5757, "outputTokens": 9, "latencyMs": 1437 }, { "questionId": "q13", "format": "json", "model": "gpt-4o-mini", "expected": "145843", "actual": "145843", "correct": true, "inputTokens": 6389, "outputTokens": 3, "latencyMs": 1263 }, { "questionId": "q13", "format": "json", "model": "claude-haiku-4-5", "expected": "145843", "actual": "145843", "correct": true, "inputTokens": 7868, "outputTokens": 6, "latencyMs": 1277 }, { "questionId": "q13", "format": "toon", "model": "gpt-4o-mini", "expected": "145843", "actual": "145843", "correct": true, "inputTokens": 2526, "outputTokens": 3, "latencyMs": 1151 }, { "questionId": "q13", "format": "toon", "model": "claude-haiku-4-5", "expected": "145843", "actual": "145843", "correct": true, "inputTokens": 2980, "outputTokens": 6, "latencyMs": 1260 }, { "questionId": "q13", "format": "csv", "model": "gpt-4o-mini", "expected": "145843", "actual": "145843", "correct": true, "inputTokens": 2380, "outputTokens": 3, "latencyMs": 1071 }, { "questionId": "q13", "format": "csv", "model": "claude-haiku-4-5", "expected": "145843", "actual": "145843", "correct": true, "inputTokens": 2854, "outputTokens": 6, "latencyMs": 891 }, { "questionId": "q13", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "145843", "actual": "145843", "correct": true, "inputTokens": 6315, "outputTokens": 3, "latencyMs": 1548 }, { "questionId": "q13", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "145843", "actual": "145843", "correct": true, "inputTokens": 6363, "outputTokens": 6, "latencyMs": 1456 }, { "questionId": "q13", "format": "yaml", "model": "gpt-4o-mini", "expected": "145843", "actual": "145843", "correct": true, "inputTokens": 5011, "outputTokens": 3, "latencyMs": 1268 }, { "questionId": "q13", "format": "yaml", "model": "claude-haiku-4-5", "expected": "145843", "actual": "145843", "correct": true, "inputTokens": 5758, "outputTokens": 6, "latencyMs": 1205 }, { "questionId": "q14", "format": "json", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 6390, "outputTokens": 2, "latencyMs": 1310 }, { "questionId": "q14", "format": "json", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 7868, "outputTokens": 4, "latencyMs": 1071 }, { "questionId": "q14", "format": "toon", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 2527, "outputTokens": 2, "latencyMs": 895 }, { "questionId": "q14", "format": "toon", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 2980, "outputTokens": 4, "latencyMs": 1020 }, { "questionId": "q14", "format": "csv", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 2381, "outputTokens": 2, "latencyMs": 1168 }, { "questionId": "q14", "format": "csv", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 2854, "outputTokens": 4, "latencyMs": 977 }, { "questionId": "q14", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Operations", "correct": false, "inputTokens": 6316, "outputTokens": 2, "latencyMs": 1370 }, { "questionId": "q14", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 6363, "outputTokens": 4, "latencyMs": 1508 }, { "questionId": "q14", "format": "yaml", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 5012, "outputTokens": 2, "latencyMs": 3622 }, { "questionId": "q14", "format": "yaml", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 5758, "outputTokens": 4, "latencyMs": 1249 }, { "questionId": "q15", "format": "json", "model": "gpt-4o-mini", "expected": "alexandria61@gmail.com", "actual": "alexandria61@gmail.com", "correct": true, "inputTokens": 6391, "outputTokens": 7, "latencyMs": 3269 }, { "questionId": "q15", "format": "json", "model": "claude-haiku-4-5", "expected": "alexandria61@gmail.com", "actual": "alexandria61@gmail.com", "correct": true, "inputTokens": 7869, "outputTokens": 9, "latencyMs": 1538 }, { "questionId": "q15", "format": "toon", "model": "gpt-4o-mini", "expected": "alexandria61@gmail.com", "actual": "alexandria61@gmail.com", "correct": true, "inputTokens": 2528, "outputTokens": 7, "latencyMs": 1413 }, { "questionId": "q15", "format": "toon", "model": "claude-haiku-4-5", "expected": "alexandria61@gmail.com", "actual": "alexandria61@gmail.com", "correct": true, "inputTokens": 2981, "outputTokens": 9, "latencyMs": 1027 }, { "questionId": "q15", "format": "csv", "model": "gpt-4o-mini", "expected": "alexandria61@gmail.com", "actual": "alexandria61@gmail.com", "correct": true, "inputTokens": 2382, "outputTokens": 7, "latencyMs": 1257 }, { "questionId": "q15", "format": "csv", "model": "claude-haiku-4-5", "expected": "alexandria61@gmail.com", "actual": "alexandria61@gmail.com", "correct": true, "inputTokens": 2855, "outputTokens": 9, "latencyMs": 1169 }, { "questionId": "q15", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "alexandria61@gmail.com", "actual": "alexandria61@gmail.com", "correct": true, "inputTokens": 6317, "outputTokens": 7, "latencyMs": 1464 }, { "questionId": "q15", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "alexandria61@gmail.com", "actual": "alexandria61@gmail.com", "correct": true, "inputTokens": 6364, "outputTokens": 9, "latencyMs": 1799 }, { "questionId": "q15", "format": "yaml", "model": "gpt-4o-mini", "expected": "alexandria61@gmail.com", "actual": "alexandria61@gmail.com", "correct": true, "inputTokens": 5013, "outputTokens": 7, "latencyMs": 1616 }, { "questionId": "q15", "format": "yaml", "model": "claude-haiku-4-5", "expected": "alexandria61@gmail.com", "actual": "alexandria61@gmail.com", "correct": true, "inputTokens": 5759, "outputTokens": 9, "latencyMs": 1349 }, { "questionId": "q16", "format": "json", "model": "gpt-4o-mini", "expected": "89436", "actual": "89436", "correct": true, "inputTokens": 6390, "outputTokens": 3, "latencyMs": 1298 }, { "questionId": "q16", "format": "json", "model": "claude-haiku-4-5", "expected": "89436", "actual": "89436", "correct": true, "inputTokens": 7870, "outputTokens": 6, "latencyMs": 1115 }, { "questionId": "q16", "format": "toon", "model": "gpt-4o-mini", "expected": "89436", "actual": "89436", "correct": true, "inputTokens": 2527, "outputTokens": 3, "latencyMs": 1180 }, { "questionId": "q16", "format": "toon", "model": "claude-haiku-4-5", "expected": "89436", "actual": "89436", "correct": true, "inputTokens": 2982, "outputTokens": 6, "latencyMs": 1110 }, { "questionId": "q16", "format": "csv", "model": "gpt-4o-mini", "expected": "89436", "actual": "89436", "correct": true, "inputTokens": 2381, "outputTokens": 3, "latencyMs": 1235 }, { "questionId": "q16", "format": "csv", "model": "claude-haiku-4-5", "expected": "89436", "actual": "89436", "correct": true, "inputTokens": 2856, "outputTokens": 6, "latencyMs": 1228 }, { "questionId": "q16", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "89436", "actual": "89436", "correct": true, "inputTokens": 6316, "outputTokens": 3, "latencyMs": 1832 }, { "questionId": "q16", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "89436", "actual": "89436", "correct": true, "inputTokens": 6365, "outputTokens": 6, "latencyMs": 1401 }, { "questionId": "q16", "format": "yaml", "model": "gpt-4o-mini", "expected": "89436", "actual": "89436", "correct": true, "inputTokens": 5012, "outputTokens": 3, "latencyMs": 933 }, { "questionId": "q16", "format": "yaml", "model": "claude-haiku-4-5", "expected": "89436", "actual": "89436", "correct": true, "inputTokens": 5760, "outputTokens": 6, "latencyMs": 1570 }, { "questionId": "q17", "format": "json", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 6393, "outputTokens": 2, "latencyMs": 1221 }, { "questionId": "q17", "format": "json", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 7872, "outputTokens": 4, "latencyMs": 1293 }, { "questionId": "q17", "format": "toon", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 2530, "outputTokens": 2, "latencyMs": 1147 }, { "questionId": "q17", "format": "toon", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 2984, "outputTokens": 4, "latencyMs": 923 }, { "questionId": "q17", "format": "csv", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 2384, "outputTokens": 2, "latencyMs": 1180 }, { "questionId": "q17", "format": "csv", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 2858, "outputTokens": 4, "latencyMs": 1025 }, { "questionId": "q17", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 6319, "outputTokens": 2, "latencyMs": 1748 }, { "questionId": "q17", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 6367, "outputTokens": 4, "latencyMs": 1188 }, { "questionId": "q17", "format": "yaml", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 5015, "outputTokens": 2, "latencyMs": 1452 }, { "questionId": "q17", "format": "yaml", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 5762, "outputTokens": 4, "latencyMs": 1329 }, { "questionId": "q18", "format": "json", "model": "gpt-4o-mini", "expected": "kelvin54@yahoo.com", "actual": "kelvin54@yahoo.com", "correct": true, "inputTokens": 6391, "outputTokens": 6, "latencyMs": 768 }, { "questionId": "q18", "format": "json", "model": "claude-haiku-4-5", "expected": "kelvin54@yahoo.com", "actual": "kelvin54@yahoo.com", "correct": true, "inputTokens": 7871, "outputTokens": 10, "latencyMs": 1150 }, { "questionId": "q18", "format": "toon", "model": "gpt-4o-mini", "expected": "kelvin54@yahoo.com", "actual": "kelvin54@yahoo.com", "correct": true, "inputTokens": 2528, "outputTokens": 6, "latencyMs": 1501 }, { "questionId": "q18", "format": "toon", "model": "claude-haiku-4-5", "expected": "kelvin54@yahoo.com", "actual": "kelvin54@yahoo.com", "correct": true, "inputTokens": 2983, "outputTokens": 10, "latencyMs": 1201 }, { "questionId": "q18", "format": "csv", "model": "gpt-4o-mini", "expected": "kelvin54@yahoo.com", "actual": "kelvin54@yahoo.com", "correct": true, "inputTokens": 2382, "outputTokens": 6, "latencyMs": 1604 }, { "questionId": "q18", "format": "csv", "model": "claude-haiku-4-5", "expected": "kelvin54@yahoo.com", "actual": "kelvin54@yahoo.com", "correct": true, "inputTokens": 2857, "outputTokens": 10, "latencyMs": 1060 }, { "questionId": "q18", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "kelvin54@yahoo.com", "actual": "kelvin54@yahoo.com", "correct": true, "inputTokens": 6317, "outputTokens": 6, "latencyMs": 1350 }, { "questionId": "q18", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "kelvin54@yahoo.com", "actual": "kelvin54@yahoo.com", "correct": true, "inputTokens": 6366, "outputTokens": 10, "latencyMs": 1154 }, { "questionId": "q18", "format": "yaml", "model": "gpt-4o-mini", "expected": "kelvin54@yahoo.com", "actual": "kelvin54@yahoo.com", "correct": true, "inputTokens": 5013, "outputTokens": 6, "latencyMs": 1199 }, { "questionId": "q18", "format": "yaml", "model": "claude-haiku-4-5", "expected": "kelvin54@yahoo.com", "actual": "kelvin54@yahoo.com", "correct": true, "inputTokens": 5761, "outputTokens": 10, "latencyMs": 1216 }, { "questionId": "q19", "format": "json", "model": "gpt-4o-mini", "expected": "143365", "actual": "143365", "correct": true, "inputTokens": 6391, "outputTokens": 3, "latencyMs": 1412 }, { "questionId": "q19", "format": "json", "model": "claude-haiku-4-5", "expected": "143365", "actual": "143365", "correct": true, "inputTokens": 7872, "outputTokens": 6, "latencyMs": 1908 }, { "questionId": "q19", "format": "toon", "model": "gpt-4o-mini", "expected": "143365", "actual": "143365", "correct": true, "inputTokens": 2528, "outputTokens": 3, "latencyMs": 1366 }, { "questionId": "q19", "format": "toon", "model": "claude-haiku-4-5", "expected": "143365", "actual": "143365", "correct": true, "inputTokens": 2984, "outputTokens": 6, "latencyMs": 1054 }, { "questionId": "q19", "format": "csv", "model": "gpt-4o-mini", "expected": "143365", "actual": "143365", "correct": true, "inputTokens": 2382, "outputTokens": 3, "latencyMs": 1121 }, { "questionId": "q19", "format": "csv", "model": "claude-haiku-4-5", "expected": "143365", "actual": "143365", "correct": true, "inputTokens": 2858, "outputTokens": 6, "latencyMs": 1262 }, { "questionId": "q19", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "143365", "actual": "143365", "correct": true, "inputTokens": 6317, "outputTokens": 3, "latencyMs": 4632 }, { "questionId": "q19", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "143365", "actual": "143365", "correct": true, "inputTokens": 6367, "outputTokens": 6, "latencyMs": 1118 }, { "questionId": "q19", "format": "yaml", "model": "gpt-4o-mini", "expected": "143365", "actual": "143365", "correct": true, "inputTokens": 5013, "outputTokens": 3, "latencyMs": 928 }, { "questionId": "q19", "format": "yaml", "model": "claude-haiku-4-5", "expected": "143365", "actual": "143365", "correct": true, "inputTokens": 5762, "outputTokens": 6, "latencyMs": 1191 }, { "questionId": "q20", "format": "json", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 6390, "outputTokens": 2, "latencyMs": 1053 }, { "questionId": "q20", "format": "json", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 7868, "outputTokens": 4, "latencyMs": 1096 }, { "questionId": "q20", "format": "toon", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 2527, "outputTokens": 2, "latencyMs": 1784 }, { "questionId": "q20", "format": "toon", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 2980, "outputTokens": 4, "latencyMs": 1093 }, { "questionId": "q20", "format": "csv", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 2381, "outputTokens": 2, "latencyMs": 1335 }, { "questionId": "q20", "format": "csv", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 2854, "outputTokens": 4, "latencyMs": 1546 }, { "questionId": "q20", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 6316, "outputTokens": 2, "latencyMs": 1293 }, { "questionId": "q20", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 6363, "outputTokens": 4, "latencyMs": 1230 }, { "questionId": "q20", "format": "yaml", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 5012, "outputTokens": 2, "latencyMs": 1467 }, { "questionId": "q20", "format": "yaml", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 5758, "outputTokens": 4, "latencyMs": 1370 }, { "questionId": "q21", "format": "json", "model": "gpt-4o-mini", "expected": "dean19@gmail.com", "actual": "dean19@gmail.com", "correct": true, "inputTokens": 6394, "outputTokens": 6, "latencyMs": 5026 }, { "questionId": "q21", "format": "json", "model": "claude-haiku-4-5", "expected": "dean19@gmail.com", "actual": "dean19@gmail.com", "correct": true, "inputTokens": 7876, "outputTokens": 9, "latencyMs": 1786 }, { "questionId": "q21", "format": "toon", "model": "gpt-4o-mini", "expected": "dean19@gmail.com", "actual": "dean19@gmail.com", "correct": true, "inputTokens": 2531, "outputTokens": 6, "latencyMs": 826 }, { "questionId": "q21", "format": "toon", "model": "claude-haiku-4-5", "expected": "dean19@gmail.com", "actual": "dean19@gmail.com", "correct": true, "inputTokens": 2988, "outputTokens": 9, "latencyMs": 909 }, { "questionId": "q21", "format": "csv", "model": "gpt-4o-mini", "expected": "dean19@gmail.com", "actual": "dean19@gmail.com", "correct": true, "inputTokens": 2385, "outputTokens": 6, "latencyMs": 1120 }, { "questionId": "q21", "format": "csv", "model": "claude-haiku-4-5", "expected": "dean19@gmail.com", "actual": "dean19@gmail.com", "correct": true, "inputTokens": 2862, "outputTokens": 9, "latencyMs": 996 }, { "questionId": "q21", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "dean19@gmail.com", "actual": "dean19@gmail.com", "correct": true, "inputTokens": 6320, "outputTokens": 6, "latencyMs": 1639 }, { "questionId": "q21", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "dean19@gmail.com", "actual": "dean19@gmail.com", "correct": true, "inputTokens": 6371, "outputTokens": 9, "latencyMs": 1299 }, { "questionId": "q21", "format": "yaml", "model": "gpt-4o-mini", "expected": "dean19@gmail.com", "actual": "dean19@gmail.com", "correct": true, "inputTokens": 5016, "outputTokens": 6, "latencyMs": 1151 }, { "questionId": "q21", "format": "yaml", "model": "claude-haiku-4-5", "expected": "dean19@gmail.com", "actual": "dean19@gmail.com", "correct": true, "inputTokens": 5766, "outputTokens": 9, "latencyMs": 1246 }, { "questionId": "q22", "format": "json", "model": "gpt-4o-mini", "expected": "111314", "actual": "111314", "correct": true, "inputTokens": 6392, "outputTokens": 3, "latencyMs": 1838 }, { "questionId": "q22", "format": "json", "model": "claude-haiku-4-5", "expected": "111314", "actual": "111314", "correct": true, "inputTokens": 7871, "outputTokens": 6, "latencyMs": 1191 }, { "questionId": "q22", "format": "toon", "model": "gpt-4o-mini", "expected": "111314", "actual": "111314", "correct": true, "inputTokens": 2529, "outputTokens": 3, "latencyMs": 980 }, { "questionId": "q22", "format": "toon", "model": "claude-haiku-4-5", "expected": "111314", "actual": "111314", "correct": true, "inputTokens": 2983, "outputTokens": 6, "latencyMs": 1299 }, { "questionId": "q22", "format": "csv", "model": "gpt-4o-mini", "expected": "111314", "actual": "111314", "correct": true, "inputTokens": 2383, "outputTokens": 3, "latencyMs": 1027 }, { "questionId": "q22", "format": "csv", "model": "claude-haiku-4-5", "expected": "111314", "actual": "111314", "correct": true, "inputTokens": 2857, "outputTokens": 6, "latencyMs": 1433 }, { "questionId": "q22", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "111314", "actual": "111314", "correct": true, "inputTokens": 6318, "outputTokens": 3, "latencyMs": 2256 }, { "questionId": "q22", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "111314", "actual": "111314", "correct": true, "inputTokens": 6366, "outputTokens": 6, "latencyMs": 1091 }, { "questionId": "q22", "format": "yaml", "model": "gpt-4o-mini", "expected": "111314", "actual": "111314", "correct": true, "inputTokens": 5014, "outputTokens": 3, "latencyMs": 1288 }, { "questionId": "q22", "format": "yaml", "model": "claude-haiku-4-5", "expected": "111314", "actual": "111314", "correct": true, "inputTokens": 5761, "outputTokens": 6, "latencyMs": 1306 }, { "questionId": "q23", "format": "json", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 6389, "outputTokens": 2, "latencyMs": 1951 }, { "questionId": "q23", "format": "json", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 7868, "outputTokens": 4, "latencyMs": 1440 }, { "questionId": "q23", "format": "toon", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 2526, "outputTokens": 2, "latencyMs": 978 }, { "questionId": "q23", "format": "toon", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 2980, "outputTokens": 4, "latencyMs": 1385 }, { "questionId": "q23", "format": "csv", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 2380, "outputTokens": 2, "latencyMs": 2311 }, { "questionId": "q23", "format": "csv", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 2854, "outputTokens": 4, "latencyMs": 1066 }, { "questionId": "q23", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 6315, "outputTokens": 2, "latencyMs": 1914 }, { "questionId": "q23", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 6363, "outputTokens": 4, "latencyMs": 1596 }, { "questionId": "q23", "format": "yaml", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 5011, "outputTokens": 2, "latencyMs": 1820 }, { "questionId": "q23", "format": "yaml", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 5758, "outputTokens": 4, "latencyMs": 1067 }, { "questionId": "q24", "format": "json", "model": "gpt-4o-mini", "expected": "laurel54@yahoo.com", "actual": "laurel54@yahoo.com", "correct": true, "inputTokens": 6391, "outputTokens": 6, "latencyMs": 2594 }, { "questionId": "q24", "format": "json", "model": "claude-haiku-4-5", "expected": "laurel54@yahoo.com", "actual": "laurel54@yahoo.com", "correct": true, "inputTokens": 7869, "outputTokens": 10, "latencyMs": 1139 }, { "questionId": "q24", "format": "toon", "model": "gpt-4o-mini", "expected": "laurel54@yahoo.com", "actual": "laurel54@yahoo.com", "correct": true, "inputTokens": 2528, "outputTokens": 6, "latencyMs": 1225 }, { "questionId": "q24", "format": "toon", "model": "claude-haiku-4-5", "expected": "laurel54@yahoo.com", "actual": "laurel54@yahoo.com", "correct": true, "inputTokens": 2981, "outputTokens": 10, "latencyMs": 1082 }, { "questionId": "q24", "format": "csv", "model": "gpt-4o-mini", "expected": "laurel54@yahoo.com", "actual": "laurel54@yahoo.com", "correct": true, "inputTokens": 2382, "outputTokens": 6, "latencyMs": 4857 }, { "questionId": "q24", "format": "csv", "model": "claude-haiku-4-5", "expected": "laurel54@yahoo.com", "actual": "laurel54@yahoo.com", "correct": true, "inputTokens": 2855, "outputTokens": 10, "latencyMs": 1082 }, { "questionId": "q24", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "laurel54@yahoo.com", "actual": "laurel54@yahoo.com", "correct": true, "inputTokens": 6317, "outputTokens": 6, "latencyMs": 1272 }, { "questionId": "q24", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "laurel54@yahoo.com", "actual": "laurel54@yahoo.com", "correct": true, "inputTokens": 6364, "outputTokens": 10, "latencyMs": 1201 }, { "questionId": "q24", "format": "yaml", "model": "gpt-4o-mini", "expected": "laurel54@yahoo.com", "actual": "laurel54@yahoo.com", "correct": true, "inputTokens": 5013, "outputTokens": 6, "latencyMs": 1197 }, { "questionId": "q24", "format": "yaml", "model": "claude-haiku-4-5", "expected": "laurel54@yahoo.com", "actual": "laurel54@yahoo.com", "correct": true, "inputTokens": 5759, "outputTokens": 10, "latencyMs": 1198 }, { "questionId": "q25", "format": "json", "model": "gpt-4o-mini", "expected": "89553", "actual": "89553", "correct": true, "inputTokens": 6392, "outputTokens": 3, "latencyMs": 1085 }, { "questionId": "q25", "format": "json", "model": "claude-haiku-4-5", "expected": "89553", "actual": "89553", "correct": true, "inputTokens": 7873, "outputTokens": 6, "latencyMs": 1102 }, { "questionId": "q25", "format": "toon", "model": "gpt-4o-mini", "expected": "89553", "actual": "89553", "correct": true, "inputTokens": 2529, "outputTokens": 3, "latencyMs": 1350 }, { "questionId": "q25", "format": "toon", "model": "claude-haiku-4-5", "expected": "89553", "actual": "89553", "correct": true, "inputTokens": 2985, "outputTokens": 6, "latencyMs": 1300 }, { "questionId": "q25", "format": "csv", "model": "gpt-4o-mini", "expected": "89553", "actual": "89553", "correct": true, "inputTokens": 2383, "outputTokens": 3, "latencyMs": 998 }, { "questionId": "q25", "format": "csv", "model": "claude-haiku-4-5", "expected": "89553", "actual": "89553", "correct": true, "inputTokens": 2859, "outputTokens": 6, "latencyMs": 972 }, { "questionId": "q25", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "89553", "actual": "89553", "correct": true, "inputTokens": 6318, "outputTokens": 3, "latencyMs": 1331 }, { "questionId": "q25", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "89553", "actual": "89553", "correct": true, "inputTokens": 6368, "outputTokens": 6, "latencyMs": 1027 }, { "questionId": "q25", "format": "yaml", "model": "gpt-4o-mini", "expected": "89553", "actual": "89553", "correct": true, "inputTokens": 5014, "outputTokens": 3, "latencyMs": 1170 }, { "questionId": "q25", "format": "yaml", "model": "claude-haiku-4-5", "expected": "89553", "actual": "89553", "correct": true, "inputTokens": 5763, "outputTokens": 6, "latencyMs": 1074 }, { "questionId": "q26", "format": "json", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 6389, "outputTokens": 2, "latencyMs": 1862 }, { "questionId": "q26", "format": "json", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 7866, "outputTokens": 4, "latencyMs": 1435 }, { "questionId": "q26", "format": "toon", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 2526, "outputTokens": 2, "latencyMs": 989 }, { "questionId": "q26", "format": "toon", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 2978, "outputTokens": 4, "latencyMs": 1035 }, { "questionId": "q26", "format": "csv", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 2380, "outputTokens": 2, "latencyMs": 2157 }, { "questionId": "q26", "format": "csv", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 2852, "outputTokens": 4, "latencyMs": 1094 }, { "questionId": "q26", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 6315, "outputTokens": 2, "latencyMs": 1912 }, { "questionId": "q26", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 6361, "outputTokens": 4, "latencyMs": 1364 }, { "questionId": "q26", "format": "yaml", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 5011, "outputTokens": 2, "latencyMs": 1435 }, { "questionId": "q26", "format": "yaml", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 5756, "outputTokens": 4, "latencyMs": 1082 }, { "questionId": "q27", "format": "json", "model": "gpt-4o-mini", "expected": "jayme.kertzmann77@gmail.com", "actual": "jayme.kertzmann77@gmail.com", "correct": true, "inputTokens": 6392, "outputTokens": 9, "latencyMs": 1274 }, { "questionId": "q27", "format": "json", "model": "claude-haiku-4-5", "expected": "jayme.kertzmann77@gmail.com", "actual": "jayme.kertzmann77@gmail.com", "correct": true, "inputTokens": 7871, "outputTokens": 14, "latencyMs": 1130 }, { "questionId": "q27", "format": "toon", "model": "gpt-4o-mini", "expected": "jayme.kertzmann77@gmail.com", "actual": "jayme.kertzmann77@gmail.com", "correct": true, "inputTokens": 2529, "outputTokens": 9, "latencyMs": 1795 }, { "questionId": "q27", "format": "toon", "model": "claude-haiku-4-5", "expected": "jayme.kertzmann77@gmail.com", "actual": "jayme.kertzmann77@gmail.com", "correct": true, "inputTokens": 2983, "outputTokens": 14, "latencyMs": 1309 }, { "questionId": "q27", "format": "csv", "model": "gpt-4o-mini", "expected": "jayme.kertzmann77@gmail.com", "actual": "jayme.kertzmann77@gmail.com", "correct": true, "inputTokens": 2383, "outputTokens": 9, "latencyMs": 1406 }, { "questionId": "q27", "format": "csv", "model": "claude-haiku-4-5", "expected": "jayme.kertzmann77@gmail.com", "actual": "jayme.kertzmann77@gmail.com", "correct": true, "inputTokens": 2857, "outputTokens": 14, "latencyMs": 1398 }, { "questionId": "q27", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "jayme.kertzmann77@gmail.com", "actual": "jayme.kertzmann77@gmail.com", "correct": true, "inputTokens": 6318, "outputTokens": 9, "latencyMs": 1114 }, { "questionId": "q27", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "jayme.kertzmann77@gmail.com", "actual": "jayme.kertzmann77@gmail.com", "correct": true, "inputTokens": 6366, "outputTokens": 14, "latencyMs": 1251 }, { "questionId": "q27", "format": "yaml", "model": "gpt-4o-mini", "expected": "jayme.kertzmann77@gmail.com", "actual": "jayme.kertzmann77@gmail.com", "correct": true, "inputTokens": 5014, "outputTokens": 9, "latencyMs": 1941 }, { "questionId": "q27", "format": "yaml", "model": "claude-haiku-4-5", "expected": "jayme.kertzmann77@gmail.com", "actual": "jayme.kertzmann77@gmail.com", "correct": true, "inputTokens": 5761, "outputTokens": 14, "latencyMs": 1218 }, { "questionId": "q28", "format": "json", "model": "gpt-4o-mini", "expected": "104053", "actual": "104053", "correct": true, "inputTokens": 6391, "outputTokens": 3, "latencyMs": 1395 }, { "questionId": "q28", "format": "json", "model": "claude-haiku-4-5", "expected": "104053", "actual": "104053", "correct": true, "inputTokens": 7871, "outputTokens": 6, "latencyMs": 1342 }, { "questionId": "q28", "format": "toon", "model": "gpt-4o-mini", "expected": "104053", "actual": "104053", "correct": true, "inputTokens": 2528, "outputTokens": 3, "latencyMs": 919 }, { "questionId": "q28", "format": "toon", "model": "claude-haiku-4-5", "expected": "104053", "actual": "104053", "correct": true, "inputTokens": 2983, "outputTokens": 6, "latencyMs": 1187 }, { "questionId": "q28", "format": "csv", "model": "gpt-4o-mini", "expected": "104053", "actual": "104053", "correct": true, "inputTokens": 2382, "outputTokens": 3, "latencyMs": 1131 }, { "questionId": "q28", "format": "csv", "model": "claude-haiku-4-5", "expected": "104053", "actual": "104053", "correct": true, "inputTokens": 2857, "outputTokens": 6, "latencyMs": 1191 }, { "questionId": "q28", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "104053", "actual": "104053", "correct": true, "inputTokens": 6317, "outputTokens": 3, "latencyMs": 1435 }, { "questionId": "q28", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "104053", "actual": "104053", "correct": true, "inputTokens": 6366, "outputTokens": 6, "latencyMs": 1095 }, { "questionId": "q28", "format": "yaml", "model": "gpt-4o-mini", "expected": "104053", "actual": "104053", "correct": true, "inputTokens": 5013, "outputTokens": 3, "latencyMs": 4588 }, { "questionId": "q28", "format": "yaml", "model": "claude-haiku-4-5", "expected": "104053", "actual": "104053", "correct": true, "inputTokens": 5761, "outputTokens": 6, "latencyMs": 1291 }, { "questionId": "q29", "format": "json", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 6392, "outputTokens": 2, "latencyMs": 1688 }, { "questionId": "q29", "format": "json", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 7872, "outputTokens": 4, "latencyMs": 1301 }, { "questionId": "q29", "format": "toon", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 2529, "outputTokens": 2, "latencyMs": 1914 }, { "questionId": "q29", "format": "toon", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 2984, "outputTokens": 4, "latencyMs": 1447 }, { "questionId": "q29", "format": "csv", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 2383, "outputTokens": 2, "latencyMs": 1725 }, { "questionId": "q29", "format": "csv", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 2858, "outputTokens": 4, "latencyMs": 923 }, { "questionId": "q29", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 6318, "outputTokens": 2, "latencyMs": 879 }, { "questionId": "q29", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 6367, "outputTokens": 4, "latencyMs": 1322 }, { "questionId": "q29", "format": "yaml", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 5014, "outputTokens": 2, "latencyMs": 1394 }, { "questionId": "q29", "format": "yaml", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 5762, "outputTokens": 4, "latencyMs": 1008 }, { "questionId": "q30", "format": "json", "model": "gpt-4o-mini", "expected": "carley.bauch@yahoo.com", "actual": "carley.bauch@yahoo.com", "correct": true, "inputTokens": 6391, "outputTokens": 7, "latencyMs": 894 }, { "questionId": "q30", "format": "json", "model": "claude-haiku-4-5", "expected": "carley.bauch@yahoo.com", "actual": "carley.bauch@yahoo.com", "correct": true, "inputTokens": 7869, "outputTokens": 12, "latencyMs": 1220 }, { "questionId": "q30", "format": "toon", "model": "gpt-4o-mini", "expected": "carley.bauch@yahoo.com", "actual": "carley.bauch@yahoo.com", "correct": true, "inputTokens": 2528, "outputTokens": 7, "latencyMs": 2225 }, { "questionId": "q30", "format": "toon", "model": "claude-haiku-4-5", "expected": "carley.bauch@yahoo.com", "actual": "carley.bauch@yahoo.com", "correct": true, "inputTokens": 2981, "outputTokens": 12, "latencyMs": 1282 }, { "questionId": "q30", "format": "csv", "model": "gpt-4o-mini", "expected": "carley.bauch@yahoo.com", "actual": "carley.bauch@yahoo.com", "correct": true, "inputTokens": 2382, "outputTokens": 7, "latencyMs": 1414 }, { "questionId": "q30", "format": "csv", "model": "claude-haiku-4-5", "expected": "carley.bauch@yahoo.com", "actual": "carley.bauch@yahoo.com", "correct": true, "inputTokens": 2855, "outputTokens": 12, "latencyMs": 1686 }, { "questionId": "q30", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "carley.bauch@yahoo.com", "actual": "carley.bauch@yahoo.com", "correct": true, "inputTokens": 6317, "outputTokens": 7, "latencyMs": 1113 }, { "questionId": "q30", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "carley.bauch@yahoo.com", "actual": "carley.bauch@yahoo.com", "correct": true, "inputTokens": 6364, "outputTokens": 12, "latencyMs": 1089 }, { "questionId": "q30", "format": "yaml", "model": "gpt-4o-mini", "expected": "carley.bauch@yahoo.com", "actual": "carley.bauch@yahoo.com", "correct": true, "inputTokens": 5013, "outputTokens": 7, "latencyMs": 949 }, { "questionId": "q30", "format": "yaml", "model": "claude-haiku-4-5", "expected": "carley.bauch@yahoo.com", "actual": "carley.bauch@yahoo.com", "correct": true, "inputTokens": 5759, "outputTokens": 12, "latencyMs": 1273 }, { "questionId": "q31", "format": "json", "model": "gpt-4o-mini", "expected": "142029", "actual": "142029", "correct": true, "inputTokens": 6394, "outputTokens": 3, "latencyMs": 4741 }, { "questionId": "q31", "format": "json", "model": "claude-haiku-4-5", "expected": "142029", "actual": "142029", "correct": true, "inputTokens": 7874, "outputTokens": 6, "latencyMs": 1132 }, { "questionId": "q31", "format": "toon", "model": "gpt-4o-mini", "expected": "142029", "actual": "142029", "correct": true, "inputTokens": 2531, "outputTokens": 3, "latencyMs": 1184 }, { "questionId": "q31", "format": "toon", "model": "claude-haiku-4-5", "expected": "142029", "actual": "142029", "correct": true, "inputTokens": 2986, "outputTokens": 6, "latencyMs": 1137 }, { "questionId": "q31", "format": "csv", "model": "gpt-4o-mini", "expected": "142029", "actual": "142029", "correct": true, "inputTokens": 2385, "outputTokens": 3, "latencyMs": 963 }, { "questionId": "q31", "format": "csv", "model": "claude-haiku-4-5", "expected": "142029", "actual": "142029", "correct": true, "inputTokens": 2860, "outputTokens": 6, "latencyMs": 1096 }, { "questionId": "q31", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "142029", "actual": "142029", "correct": true, "inputTokens": 6320, "outputTokens": 3, "latencyMs": 1399 }, { "questionId": "q31", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "142029", "actual": "142029", "correct": true, "inputTokens": 6369, "outputTokens": 6, "latencyMs": 1594 }, { "questionId": "q31", "format": "yaml", "model": "gpt-4o-mini", "expected": "142029", "actual": "142029", "correct": true, "inputTokens": 5016, "outputTokens": 3, "latencyMs": 1900 }, { "questionId": "q31", "format": "yaml", "model": "claude-haiku-4-5", "expected": "142029", "actual": "142029", "correct": true, "inputTokens": 5764, "outputTokens": 6, "latencyMs": 1274 }, { "questionId": "q32", "format": "json", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Sales", "correct": false, "inputTokens": 6390, "outputTokens": 2, "latencyMs": 5224 }, { "questionId": "q32", "format": "json", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 7869, "outputTokens": 4, "latencyMs": 1038 }, { "questionId": "q32", "format": "toon", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 2527, "outputTokens": 2, "latencyMs": 1902 }, { "questionId": "q32", "format": "toon", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 2981, "outputTokens": 4, "latencyMs": 1010 }, { "questionId": "q32", "format": "csv", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 2381, "outputTokens": 2, "latencyMs": 3263 }, { "questionId": "q32", "format": "csv", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 2855, "outputTokens": 4, "latencyMs": 871 }, { "questionId": "q32", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Sales", "correct": false, "inputTokens": 6316, "outputTokens": 2, "latencyMs": 1278 }, { "questionId": "q32", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 6364, "outputTokens": 4, "latencyMs": 1048 }, { "questionId": "q32", "format": "yaml", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Sales", "correct": false, "inputTokens": 5012, "outputTokens": 2, "latencyMs": 1271 }, { "questionId": "q32", "format": "yaml", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 5759, "outputTokens": 4, "latencyMs": 1075 }, { "questionId": "q33", "format": "json", "model": "gpt-4o-mini", "expected": "cheyenne_skiles@hotmail.com", "actual": "cheyenne_skiles@hotmail.com", "correct": true, "inputTokens": 6394, "outputTokens": 7, "latencyMs": 1139 }, { "questionId": "q33", "format": "json", "model": "claude-haiku-4-5", "expected": "cheyenne_skiles@hotmail.com", "actual": "cheyenne_skiles@hotmail.com", "correct": true, "inputTokens": 7872, "outputTokens": 14, "latencyMs": 1319 }, { "questionId": "q33", "format": "toon", "model": "gpt-4o-mini", "expected": "cheyenne_skiles@hotmail.com", "actual": "cheyenne_skiles@hotmail.com", "correct": true, "inputTokens": 2531, "outputTokens": 7, "latencyMs": 1856 }, { "questionId": "q33", "format": "toon", "model": "claude-haiku-4-5", "expected": "cheyenne_skiles@hotmail.com", "actual": "cheyenne_skiles@hotmail.com", "correct": true, "inputTokens": 2984, "outputTokens": 14, "latencyMs": 1393 }, { "questionId": "q33", "format": "csv", "model": "gpt-4o-mini", "expected": "cheyenne_skiles@hotmail.com", "actual": "cheyenne_skiles@hotmail.com", "correct": true, "inputTokens": 2385, "outputTokens": 7, "latencyMs": 1766 }, { "questionId": "q33", "format": "csv", "model": "claude-haiku-4-5", "expected": "cheyenne_skiles@hotmail.com", "actual": "cheyenne_skiles@hotmail.com", "correct": true, "inputTokens": 2858, "outputTokens": 14, "latencyMs": 1609 }, { "questionId": "q33", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "cheyenne_skiles@hotmail.com", "actual": "cheyenne_skiles@hotmail.com", "correct": true, "inputTokens": 6320, "outputTokens": 7, "latencyMs": 1329 }, { "questionId": "q33", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "cheyenne_skiles@hotmail.com", "actual": "cheyenne_skiles@hotmail.com", "correct": true, "inputTokens": 6367, "outputTokens": 14, "latencyMs": 1178 }, { "questionId": "q33", "format": "yaml", "model": "gpt-4o-mini", "expected": "cheyenne_skiles@hotmail.com", "actual": "cheyenne_skiles@hotmail.com", "correct": true, "inputTokens": 5016, "outputTokens": 7, "latencyMs": 1890 }, { "questionId": "q33", "format": "yaml", "model": "claude-haiku-4-5", "expected": "cheyenne_skiles@hotmail.com", "actual": "cheyenne_skiles@hotmail.com", "correct": true, "inputTokens": 5762, "outputTokens": 14, "latencyMs": 1326 }, { "questionId": "q34", "format": "json", "model": "gpt-4o-mini", "expected": "84650", "actual": "84650", "correct": true, "inputTokens": 6392, "outputTokens": 3, "latencyMs": 1898 }, { "questionId": "q34", "format": "json", "model": "claude-haiku-4-5", "expected": "84650", "actual": "84650", "correct": true, "inputTokens": 7871, "outputTokens": 6, "latencyMs": 1074 }, { "questionId": "q34", "format": "toon", "model": "gpt-4o-mini", "expected": "84650", "actual": "84650", "correct": true, "inputTokens": 2529, "outputTokens": 3, "latencyMs": 1382 }, { "questionId": "q34", "format": "toon", "model": "claude-haiku-4-5", "expected": "84650", "actual": "84650", "correct": true, "inputTokens": 2983, "outputTokens": 6, "latencyMs": 1060 }, { "questionId": "q34", "format": "csv", "model": "gpt-4o-mini", "expected": "84650", "actual": "84650", "correct": true, "inputTokens": 2383, "outputTokens": 3, "latencyMs": 1286 }, { "questionId": "q34", "format": "csv", "model": "claude-haiku-4-5", "expected": "84650", "actual": "84650", "correct": true, "inputTokens": 2857, "outputTokens": 6, "latencyMs": 1591 }, { "questionId": "q34", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "84650", "actual": "84650", "correct": true, "inputTokens": 6318, "outputTokens": 3, "latencyMs": 2158 }, { "questionId": "q34", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "84650", "actual": "84650", "correct": true, "inputTokens": 6366, "outputTokens": 6, "latencyMs": 1532 }, { "questionId": "q34", "format": "yaml", "model": "gpt-4o-mini", "expected": "84650", "actual": "84650", "correct": true, "inputTokens": 5014, "outputTokens": 3, "latencyMs": 1381 }, { "questionId": "q34", "format": "yaml", "model": "claude-haiku-4-5", "expected": "84650", "actual": "84650", "correct": true, "inputTokens": 5761, "outputTokens": 6, "latencyMs": 2262 }, { "questionId": "q35", "format": "json", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 6391, "outputTokens": 2, "latencyMs": 2664 }, { "questionId": "q35", "format": "json", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 7871, "outputTokens": 4, "latencyMs": 1260 }, { "questionId": "q35", "format": "toon", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 2528, "outputTokens": 2, "latencyMs": 1563 }, { "questionId": "q35", "format": "toon", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 2983, "outputTokens": 4, "latencyMs": 1415 }, { "questionId": "q35", "format": "csv", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 2382, "outputTokens": 2, "latencyMs": 1038 }, { "questionId": "q35", "format": "csv", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 2857, "outputTokens": 4, "latencyMs": 1021 }, { "questionId": "q35", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 6317, "outputTokens": 2, "latencyMs": 4276 }, { "questionId": "q35", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 6366, "outputTokens": 4, "latencyMs": 1301 }, { "questionId": "q35", "format": "yaml", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 5013, "outputTokens": 2, "latencyMs": 1399 }, { "questionId": "q35", "format": "yaml", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 5761, "outputTokens": 4, "latencyMs": 1197 }, { "questionId": "q36", "format": "json", "model": "gpt-4o-mini", "expected": "macey.gottlieb5@yahoo.com", "actual": "macey.gottlieb5@yahoo.com", "correct": true, "inputTokens": 6390, "outputTokens": 9, "latencyMs": 1390 }, { "questionId": "q36", "format": "json", "model": "claude-haiku-4-5", "expected": "macey.gottlieb5@yahoo.com", "actual": "macey.gottlieb5@yahoo.com", "correct": true, "inputTokens": 7869, "outputTokens": 14, "latencyMs": 1482 }, { "questionId": "q36", "format": "toon", "model": "gpt-4o-mini", "expected": "macey.gottlieb5@yahoo.com", "actual": "macey.gottlieb5@yahoo.com", "correct": true, "inputTokens": 2527, "outputTokens": 9, "latencyMs": 1754 }, { "questionId": "q36", "format": "toon", "model": "claude-haiku-4-5", "expected": "macey.gottlieb5@yahoo.com", "actual": "macey.gottlieb5@yahoo.com", "correct": true, "inputTokens": 2981, "outputTokens": 14, "latencyMs": 1100 }, { "questionId": "q36", "format": "csv", "model": "gpt-4o-mini", "expected": "macey.gottlieb5@yahoo.com", "actual": "macey.gottlieb5@yahoo.com", "correct": true, "inputTokens": 2381, "outputTokens": 9, "latencyMs": 1421 }, { "questionId": "q36", "format": "csv", "model": "claude-haiku-4-5", "expected": "macey.gottlieb5@yahoo.com", "actual": "macey.gottlieb5@yahoo.com", "correct": true, "inputTokens": 2855, "outputTokens": 14, "latencyMs": 2173 }, { "questionId": "q36", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "macey.gottlieb5@yahoo.com", "actual": "macey.gottlieb5@yahoo.com", "correct": true, "inputTokens": 6316, "outputTokens": 9, "latencyMs": 2911 }, { "questionId": "q36", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "macey.gottlieb5@yahoo.com", "actual": "macey.gottlieb5@yahoo.com", "correct": true, "inputTokens": 6364, "outputTokens": 14, "latencyMs": 1235 }, { "questionId": "q36", "format": "yaml", "model": "gpt-4o-mini", "expected": "macey.gottlieb5@yahoo.com", "actual": "macey.gottlieb5@yahoo.com", "correct": true, "inputTokens": 5012, "outputTokens": 9, "latencyMs": 1303 }, { "questionId": "q36", "format": "yaml", "model": "claude-haiku-4-5", "expected": "macey.gottlieb5@yahoo.com", "actual": "macey.gottlieb5@yahoo.com", "correct": true, "inputTokens": 5759, "outputTokens": 14, "latencyMs": 1148 }, { "questionId": "q37", "format": "json", "model": "gpt-4o-mini", "expected": "89773", "actual": "89773", "correct": true, "inputTokens": 6390, "outputTokens": 3, "latencyMs": 1430 }, { "questionId": "q37", "format": "json", "model": "claude-haiku-4-5", "expected": "89773", "actual": "89773", "correct": true, "inputTokens": 7868, "outputTokens": 6, "latencyMs": 1089 }, { "questionId": "q37", "format": "toon", "model": "gpt-4o-mini", "expected": "89773", "actual": "89773", "correct": true, "inputTokens": 2527, "outputTokens": 3, "latencyMs": 1059 }, { "questionId": "q37", "format": "toon", "model": "claude-haiku-4-5", "expected": "89773", "actual": "89773", "correct": true, "inputTokens": 2980, "outputTokens": 6, "latencyMs": 1057 }, { "questionId": "q37", "format": "csv", "model": "gpt-4o-mini", "expected": "89773", "actual": "89773", "correct": true, "inputTokens": 2381, "outputTokens": 3, "latencyMs": 1716 }, { "questionId": "q37", "format": "csv", "model": "claude-haiku-4-5", "expected": "89773", "actual": "89773", "correct": true, "inputTokens": 2854, "outputTokens": 6, "latencyMs": 904 }, { "questionId": "q37", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "89773", "actual": "89773", "correct": true, "inputTokens": 6316, "outputTokens": 3, "latencyMs": 2950 }, { "questionId": "q37", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "89773", "actual": "89773", "correct": true, "inputTokens": 6363, "outputTokens": 6, "latencyMs": 1189 }, { "questionId": "q37", "format": "yaml", "model": "gpt-4o-mini", "expected": "89773", "actual": "89773", "correct": true, "inputTokens": 5012, "outputTokens": 3, "latencyMs": 1050 }, { "questionId": "q37", "format": "yaml", "model": "claude-haiku-4-5", "expected": "89773", "actual": "89773", "correct": true, "inputTokens": 5758, "outputTokens": 6, "latencyMs": 1329 }, { "questionId": "q38", "format": "json", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 6390, "outputTokens": 2, "latencyMs": 3410 }, { "questionId": "q38", "format": "json", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 7868, "outputTokens": 4, "latencyMs": 1891 }, { "questionId": "q38", "format": "toon", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 2527, "outputTokens": 2, "latencyMs": 1010 }, { "questionId": "q38", "format": "toon", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 2980, "outputTokens": 4, "latencyMs": 988 }, { "questionId": "q38", "format": "csv", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 2381, "outputTokens": 2, "latencyMs": 1364 }, { "questionId": "q38", "format": "csv", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 2854, "outputTokens": 4, "latencyMs": 1395 }, { "questionId": "q38", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 6316, "outputTokens": 2, "latencyMs": 2293 }, { "questionId": "q38", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 6363, "outputTokens": 4, "latencyMs": 1137 }, { "questionId": "q38", "format": "yaml", "model": "gpt-4o-mini", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 5012, "outputTokens": 2, "latencyMs": 1451 }, { "questionId": "q38", "format": "yaml", "model": "claude-haiku-4-5", "expected": "Marketing", "actual": "Marketing", "correct": true, "inputTokens": 5758, "outputTokens": 4, "latencyMs": 1100 }, { "questionId": "q39", "format": "json", "model": "gpt-4o-mini", "expected": "georgianna_renner@yahoo.com", "actual": "georgianna_renner@yahoo.com", "correct": true, "inputTokens": 6390, "outputTokens": 10, "latencyMs": 1674 }, { "questionId": "q39", "format": "json", "model": "claude-haiku-4-5", "expected": "georgianna_renner@yahoo.com", "actual": "georgianna_renner@yahoo.com", "correct": true, "inputTokens": 7869, "outputTokens": 13, "latencyMs": 1403 }, { "questionId": "q39", "format": "toon", "model": "gpt-4o-mini", "expected": "georgianna_renner@yahoo.com", "actual": "georgianna_renner@yahoo.com", "correct": true, "inputTokens": 2527, "outputTokens": 10, "latencyMs": 1413 }, { "questionId": "q39", "format": "toon", "model": "claude-haiku-4-5", "expected": "georgianna_renner@yahoo.com", "actual": "georgianna_renner@yahoo.com", "correct": true, "inputTokens": 2981, "outputTokens": 13, "latencyMs": 1200 }, { "questionId": "q39", "format": "csv", "model": "gpt-4o-mini", "expected": "georgianna_renner@yahoo.com", "actual": "georgianna_renner@yahoo.com", "correct": true, "inputTokens": 2381, "outputTokens": 10, "latencyMs": 1730 }, { "questionId": "q39", "format": "csv", "model": "claude-haiku-4-5", "expected": "georgianna_renner@yahoo.com", "actual": "georgianna_renner@yahoo.com", "correct": true, "inputTokens": 2855, "outputTokens": 13, "latencyMs": 1226 }, { "questionId": "q39", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "georgianna_renner@yahoo.com", "actual": "georgianna_renner@yahoo.com", "correct": true, "inputTokens": 6316, "outputTokens": 10, "latencyMs": 1251 }, { "questionId": "q39", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "georgianna_renner@yahoo.com", "actual": "georgianna_renner@yahoo.com", "correct": true, "inputTokens": 6364, "outputTokens": 13, "latencyMs": 1337 }, { "questionId": "q39", "format": "yaml", "model": "gpt-4o-mini", "expected": "georgianna_renner@yahoo.com", "actual": "georgianna_renner@yahoo.com", "correct": true, "inputTokens": 5012, "outputTokens": 10, "latencyMs": 2368 }, { "questionId": "q39", "format": "yaml", "model": "claude-haiku-4-5", "expected": "georgianna_renner@yahoo.com", "actual": "georgianna_renner@yahoo.com", "correct": true, "inputTokens": 5759, "outputTokens": 13, "latencyMs": 1251 }, { "questionId": "q40", "format": "json", "model": "gpt-4o-mini", "expected": "49741", "actual": "49741", "correct": true, "inputTokens": 6391, "outputTokens": 3, "latencyMs": 3815 }, { "questionId": "q40", "format": "json", "model": "claude-haiku-4-5", "expected": "49741", "actual": "49741", "correct": true, "inputTokens": 7871, "outputTokens": 6, "latencyMs": 1169 }, { "questionId": "q40", "format": "toon", "model": "gpt-4o-mini", "expected": "49741", "actual": "49741", "correct": true, "inputTokens": 2528, "outputTokens": 3, "latencyMs": 1070 }, { "questionId": "q40", "format": "toon", "model": "claude-haiku-4-5", "expected": "49741", "actual": "49741", "correct": true, "inputTokens": 2983, "outputTokens": 6, "latencyMs": 1162 }, { "questionId": "q40", "format": "csv", "model": "gpt-4o-mini", "expected": "49741", "actual": "49741", "correct": true, "inputTokens": 2382, "outputTokens": 3, "latencyMs": 1115 }, { "questionId": "q40", "format": "csv", "model": "claude-haiku-4-5", "expected": "49741", "actual": "144426", "correct": false, "inputTokens": 2857, "outputTokens": 6, "latencyMs": 1365 }, { "questionId": "q40", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "49741", "actual": "49741", "correct": true, "inputTokens": 6317, "outputTokens": 3, "latencyMs": 2004 }, { "questionId": "q40", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "49741", "actual": "49741", "correct": true, "inputTokens": 6366, "outputTokens": 6, "latencyMs": 1113 }, { "questionId": "q40", "format": "yaml", "model": "gpt-4o-mini", "expected": "49741", "actual": "49741", "correct": true, "inputTokens": 5013, "outputTokens": 3, "latencyMs": 3055 }, { "questionId": "q40", "format": "yaml", "model": "claude-haiku-4-5", "expected": "49741", "actual": "49741", "correct": true, "inputTokens": 5761, "outputTokens": 6, "latencyMs": 1392 }, { "questionId": "q41", "format": "json", "model": "gpt-4o-mini", "expected": "17", "actual": "20", "correct": false, "inputTokens": 6388, "outputTokens": 2, "latencyMs": 3877 }, { "questionId": "q41", "format": "json", "model": "claude-haiku-4-5", "expected": "17", "actual": "15", "correct": false, "inputTokens": 7865, "outputTokens": 5, "latencyMs": 1128 }, { "questionId": "q41", "format": "toon", "model": "gpt-4o-mini", "expected": "17", "actual": "20", "correct": false, "inputTokens": 2525, "outputTokens": 2, "latencyMs": 966 }, { "questionId": "q41", "format": "toon", "model": "claude-haiku-4-5", "expected": "17", "actual": "15", "correct": false, "inputTokens": 2977, "outputTokens": 5, "latencyMs": 1070 }, { "questionId": "q41", "format": "csv", "model": "gpt-4o-mini", "expected": "17", "actual": "20", "correct": false, "inputTokens": 2379, "outputTokens": 2, "latencyMs": 2411 }, { "questionId": "q41", "format": "csv", "model": "claude-haiku-4-5", "expected": "17", "actual": "15", "correct": false, "inputTokens": 2851, "outputTokens": 5, "latencyMs": 1286 }, { "questionId": "q41", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "17", "actual": "20", "correct": false, "inputTokens": 6314, "outputTokens": 2, "latencyMs": 2082 }, { "questionId": "q41", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "17", "actual": "15", "correct": false, "inputTokens": 6360, "outputTokens": 5, "latencyMs": 1107 }, { "questionId": "q41", "format": "yaml", "model": "gpt-4o-mini", "expected": "17", "actual": "20", "correct": false, "inputTokens": 5010, "outputTokens": 2, "latencyMs": 1216 }, { "questionId": "q41", "format": "yaml", "model": "claude-haiku-4-5", "expected": "17", "actual": "15", "correct": false, "inputTokens": 5755, "outputTokens": 5, "latencyMs": 1052 }, { "questionId": "q42", "format": "json", "model": "gpt-4o-mini", "expected": "17", "actual": "20", "correct": false, "inputTokens": 6388, "outputTokens": 2, "latencyMs": 1572 }, { "questionId": "q42", "format": "json", "model": "claude-haiku-4-5", "expected": "17", "actual": "15", "correct": false, "inputTokens": 7865, "outputTokens": 5, "latencyMs": 1084 }, { "questionId": "q42", "format": "toon", "model": "gpt-4o-mini", "expected": "17", "actual": "20", "correct": false, "inputTokens": 2525, "outputTokens": 2, "latencyMs": 1377 }, { "questionId": "q42", "format": "toon", "model": "claude-haiku-4-5", "expected": "17", "actual": "14", "correct": false, "inputTokens": 2977, "outputTokens": 5, "latencyMs": 1197 }, { "questionId": "q42", "format": "csv", "model": "gpt-4o-mini", "expected": "17", "actual": "20", "correct": false, "inputTokens": 2379, "outputTokens": 2, "latencyMs": 2705 }, { "questionId": "q42", "format": "csv", "model": "claude-haiku-4-5", "expected": "17", "actual": "15", "correct": false, "inputTokens": 2851, "outputTokens": 5, "latencyMs": 1020 }, { "questionId": "q42", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "17", "actual": "20", "correct": false, "inputTokens": 6314, "outputTokens": 2, "latencyMs": 5345 }, { "questionId": "q42", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "17", "actual": "14", "correct": false, "inputTokens": 6360, "outputTokens": 5, "latencyMs": 1207 }, { "questionId": "q42", "format": "yaml", "model": "gpt-4o-mini", "expected": "17", "actual": "20", "correct": false, "inputTokens": 5010, "outputTokens": 2, "latencyMs": 921 }, { "questionId": "q42", "format": "yaml", "model": "claude-haiku-4-5", "expected": "17", "actual": "15", "correct": false, "inputTokens": 5755, "outputTokens": 5, "latencyMs": 1289 }, { "questionId": "q43", "format": "json", "model": "gpt-4o-mini", "expected": "17", "actual": "20", "correct": false, "inputTokens": 6388, "outputTokens": 2, "latencyMs": 2423 }, { "questionId": "q43", "format": "json", "model": "claude-haiku-4-5", "expected": "17", "actual": "15", "correct": false, "inputTokens": 7865, "outputTokens": 5, "latencyMs": 1273 }, { "questionId": "q43", "format": "toon", "model": "gpt-4o-mini", "expected": "17", "actual": "20", "correct": false, "inputTokens": 2525, "outputTokens": 2, "latencyMs": 975 }, { "questionId": "q43", "format": "toon", "model": "claude-haiku-4-5", "expected": "17", "actual": "15", "correct": false, "inputTokens": 2977, "outputTokens": 5, "latencyMs": 1301 }, { "questionId": "q43", "format": "csv", "model": "gpt-4o-mini", "expected": "17", "actual": "20", "correct": false, "inputTokens": 2379, "outputTokens": 2, "latencyMs": 1423 }, { "questionId": "q43", "format": "csv", "model": "claude-haiku-4-5", "expected": "17", "actual": "15", "correct": false, "inputTokens": 2851, "outputTokens": 5, "latencyMs": 927 }, { "questionId": "q43", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "17", "actual": "20", "correct": false, "inputTokens": 6314, "outputTokens": 2, "latencyMs": 1258 }, { "questionId": "q43", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "17", "actual": "15", "correct": false, "inputTokens": 6360, "outputTokens": 5, "latencyMs": 1250 }, { "questionId": "q43", "format": "yaml", "model": "gpt-4o-mini", "expected": "17", "actual": "20", "correct": false, "inputTokens": 5010, "outputTokens": 2, "latencyMs": 872 }, { "questionId": "q43", "format": "yaml", "model": "claude-haiku-4-5", "expected": "17", "actual": "15", "correct": false, "inputTokens": 5755, "outputTokens": 5, "latencyMs": 1385 }, { "questionId": "q44", "format": "json", "model": "gpt-4o-mini", "expected": "17", "actual": "20", "correct": false, "inputTokens": 6388, "outputTokens": 2, "latencyMs": 1201 }, { "questionId": "q44", "format": "json", "model": "claude-haiku-4-5", "expected": "17", "actual": "15", "correct": false, "inputTokens": 7865, "outputTokens": 5, "latencyMs": 1149 }, { "questionId": "q44", "format": "toon", "model": "gpt-4o-mini", "expected": "17", "actual": "20", "correct": false, "inputTokens": 2525, "outputTokens": 2, "latencyMs": 1498 }, { "questionId": "q44", "format": "toon", "model": "claude-haiku-4-5", "expected": "17", "actual": "15", "correct": false, "inputTokens": 2977, "outputTokens": 5, "latencyMs": 1149 }, { "questionId": "q44", "format": "csv", "model": "gpt-4o-mini", "expected": "17", "actual": "20", "correct": false, "inputTokens": 2379, "outputTokens": 2, "latencyMs": 1098 }, { "questionId": "q44", "format": "csv", "model": "claude-haiku-4-5", "expected": "17", "actual": "15", "correct": false, "inputTokens": 2851, "outputTokens": 5, "latencyMs": 1121 }, { "questionId": "q44", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "17", "actual": "20", "correct": false, "inputTokens": 6314, "outputTokens": 2, "latencyMs": 2522 }, { "questionId": "q44", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "17", "actual": "10", "correct": false, "inputTokens": 6360, "outputTokens": 5, "latencyMs": 1532 }, { "questionId": "q44", "format": "yaml", "model": "gpt-4o-mini", "expected": "17", "actual": "20", "correct": false, "inputTokens": 5010, "outputTokens": 2, "latencyMs": 4914 }, { "questionId": "q44", "format": "yaml", "model": "claude-haiku-4-5", "expected": "17", "actual": "15", "correct": false, "inputTokens": 5755, "outputTokens": 5, "latencyMs": 1324 }, { "questionId": "q45", "format": "json", "model": "gpt-4o-mini", "expected": "16", "actual": "20", "correct": false, "inputTokens": 6388, "outputTokens": 2, "latencyMs": 1446 }, { "questionId": "q45", "format": "json", "model": "claude-haiku-4-5", "expected": "16", "actual": "12", "correct": false, "inputTokens": 7865, "outputTokens": 5, "latencyMs": 1105 }, { "questionId": "q45", "format": "toon", "model": "gpt-4o-mini", "expected": "16", "actual": "20", "correct": false, "inputTokens": 2525, "outputTokens": 2, "latencyMs": 1297 }, { "questionId": "q45", "format": "toon", "model": "claude-haiku-4-5", "expected": "16", "actual": "15", "correct": false, "inputTokens": 2977, "outputTokens": 5, "latencyMs": 1251 }, { "questionId": "q45", "format": "csv", "model": "gpt-4o-mini", "expected": "16", "actual": "20", "correct": false, "inputTokens": 2379, "outputTokens": 2, "latencyMs": 1561 }, { "questionId": "q45", "format": "csv", "model": "claude-haiku-4-5", "expected": "16", "actual": "15", "correct": false, "inputTokens": 2851, "outputTokens": 5, "latencyMs": 1292 }, { "questionId": "q45", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "16", "actual": "20", "correct": false, "inputTokens": 6314, "outputTokens": 2, "latencyMs": 1127 }, { "questionId": "q45", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "16", "actual": "12", "correct": false, "inputTokens": 6360, "outputTokens": 5, "latencyMs": 1207 }, { "questionId": "q45", "format": "yaml", "model": "gpt-4o-mini", "expected": "16", "actual": "20", "correct": false, "inputTokens": 5010, "outputTokens": 2, "latencyMs": 1582 }, { "questionId": "q45", "format": "yaml", "model": "claude-haiku-4-5", "expected": "16", "actual": "15", "correct": false, "inputTokens": 5755, "outputTokens": 5, "latencyMs": 1278 }, { "questionId": "q46", "format": "json", "model": "gpt-4o-mini", "expected": "16", "actual": "20", "correct": false, "inputTokens": 6388, "outputTokens": 2, "latencyMs": 1278 }, { "questionId": "q46", "format": "json", "model": "claude-haiku-4-5", "expected": "16", "actual": "10", "correct": false, "inputTokens": 7865, "outputTokens": 5, "latencyMs": 3084 }, { "questionId": "q46", "format": "toon", "model": "gpt-4o-mini", "expected": "16", "actual": "20", "correct": false, "inputTokens": 2525, "outputTokens": 2, "latencyMs": 1289 }, { "questionId": "q46", "format": "toon", "model": "claude-haiku-4-5", "expected": "16", "actual": "15", "correct": false, "inputTokens": 2977, "outputTokens": 5, "latencyMs": 1591 }, { "questionId": "q46", "format": "csv", "model": "gpt-4o-mini", "expected": "16", "actual": "20", "correct": false, "inputTokens": 2379, "outputTokens": 2, "latencyMs": 3038 }, { "questionId": "q46", "format": "csv", "model": "claude-haiku-4-5", "expected": "16", "actual": "15", "correct": false, "inputTokens": 2851, "outputTokens": 5, "latencyMs": 1447 }, { "questionId": "q46", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "16", "actual": "20", "correct": false, "inputTokens": 6314, "outputTokens": 2, "latencyMs": 1224 }, { "questionId": "q46", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "16", "actual": "10", "correct": false, "inputTokens": 6360, "outputTokens": 5, "latencyMs": 1250 }, { "questionId": "q46", "format": "yaml", "model": "gpt-4o-mini", "expected": "16", "actual": "20", "correct": false, "inputTokens": 5010, "outputTokens": 2, "latencyMs": 1364 }, { "questionId": "q46", "format": "yaml", "model": "claude-haiku-4-5", "expected": "16", "actual": "12", "correct": false, "inputTokens": 5755, "outputTokens": 5, "latencyMs": 1560 }, { "questionId": "q47", "format": "json", "model": "gpt-4o-mini", "expected": "91", "actual": "66", "correct": false, "inputTokens": 6393, "outputTokens": 2, "latencyMs": 989 }, { "questionId": "q47", "format": "json", "model": "claude-haiku-4-5", "expected": "91", "actual": "89", "correct": false, "inputTokens": 7870, "outputTokens": 5, "latencyMs": 1358 }, { "questionId": "q47", "format": "toon", "model": "gpt-4o-mini", "expected": "91", "actual": "66", "correct": false, "inputTokens": 2530, "outputTokens": 2, "latencyMs": 1406 }, { "questionId": "q47", "format": "toon", "model": "claude-haiku-4-5", "expected": "91", "actual": "85", "correct": false, "inputTokens": 2982, "outputTokens": 5, "latencyMs": 1123 }, { "questionId": "q47", "format": "csv", "model": "gpt-4o-mini", "expected": "91", "actual": "66", "correct": false, "inputTokens": 2384, "outputTokens": 2, "latencyMs": 4883 }, { "questionId": "q47", "format": "csv", "model": "claude-haiku-4-5", "expected": "91", "actual": "85", "correct": false, "inputTokens": 2856, "outputTokens": 5, "latencyMs": 1402 }, { "questionId": "q47", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "91", "actual": "66", "correct": false, "inputTokens": 6319, "outputTokens": 2, "latencyMs": 1915 }, { "questionId": "q47", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "91", "actual": "89", "correct": false, "inputTokens": 6365, "outputTokens": 5, "latencyMs": 1263 }, { "questionId": "q47", "format": "yaml", "model": "gpt-4o-mini", "expected": "91", "actual": "66", "correct": false, "inputTokens": 5015, "outputTokens": 2, "latencyMs": 1448 }, { "questionId": "q47", "format": "yaml", "model": "claude-haiku-4-5", "expected": "91", "actual": "89", "correct": false, "inputTokens": 5760, "outputTokens": 5, "latencyMs": 1243 }, { "questionId": "q48", "format": "json", "model": "gpt-4o-mini", "expected": "67", "actual": "54", "correct": false, "inputTokens": 6393, "outputTokens": 2, "latencyMs": 1456 }, { "questionId": "q48", "format": "json", "model": "claude-haiku-4-5", "expected": "67", "actual": "57", "correct": false, "inputTokens": 7870, "outputTokens": 5, "latencyMs": 1186 }, { "questionId": "q48", "format": "toon", "model": "gpt-4o-mini", "expected": "67", "actual": "54", "correct": false, "inputTokens": 2530, "outputTokens": 2, "latencyMs": 1076 }, { "questionId": "q48", "format": "toon", "model": "claude-haiku-4-5", "expected": "67", "actual": "47", "correct": false, "inputTokens": 2982, "outputTokens": 5, "latencyMs": 1168 }, { "questionId": "q48", "format": "csv", "model": "gpt-4o-mini", "expected": "67", "actual": "56", "correct": false, "inputTokens": 2384, "outputTokens": 2, "latencyMs": 3105 }, { "questionId": "q48", "format": "csv", "model": "claude-haiku-4-5", "expected": "67", "actual": "47", "correct": false, "inputTokens": 2856, "outputTokens": 5, "latencyMs": 1375 }, { "questionId": "q48", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "67", "actual": "66", "correct": false, "inputTokens": 6319, "outputTokens": 2, "latencyMs": 1618 }, { "questionId": "q48", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "67", "actual": "47", "correct": false, "inputTokens": 6365, "outputTokens": 5, "latencyMs": 1454 }, { "questionId": "q48", "format": "yaml", "model": "gpt-4o-mini", "expected": "67", "actual": "54", "correct": false, "inputTokens": 5015, "outputTokens": 2, "latencyMs": 1244 }, { "questionId": "q48", "format": "yaml", "model": "claude-haiku-4-5", "expected": "67", "actual": "57", "correct": false, "inputTokens": 5760, "outputTokens": 5, "latencyMs": 1113 }, { "questionId": "q49", "format": "json", "model": "gpt-4o-mini", "expected": "41", "actual": "30", "correct": false, "inputTokens": 6393, "outputTokens": 2, "latencyMs": 1267 }, { "questionId": "q49", "format": "json", "model": "claude-haiku-4-5", "expected": "41", "actual": "31", "correct": false, "inputTokens": 7870, "outputTokens": 5, "latencyMs": 1227 }, { "questionId": "q49", "format": "toon", "model": "gpt-4o-mini", "expected": "41", "actual": "30", "correct": false, "inputTokens": 2530, "outputTokens": 2, "latencyMs": 1246 }, { "questionId": "q49", "format": "toon", "model": "claude-haiku-4-5", "expected": "41", "actual": "27", "correct": false, "inputTokens": 2982, "outputTokens": 5, "latencyMs": 1127 }, { "questionId": "q49", "format": "csv", "model": "gpt-4o-mini", "expected": "41", "actual": "34", "correct": false, "inputTokens": 2384, "outputTokens": 2, "latencyMs": 1260 }, { "questionId": "q49", "format": "csv", "model": "claude-haiku-4-5", "expected": "41", "actual": "31", "correct": false, "inputTokens": 2856, "outputTokens": 5, "latencyMs": 1293 }, { "questionId": "q49", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "41", "actual": "24", "correct": false, "inputTokens": 6319, "outputTokens": 2, "latencyMs": 1246 }, { "questionId": "q49", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "41", "actual": "27", "correct": false, "inputTokens": 6365, "outputTokens": 5, "latencyMs": 1598 }, { "questionId": "q49", "format": "yaml", "model": "gpt-4o-mini", "expected": "41", "actual": "24", "correct": false, "inputTokens": 5015, "outputTokens": 2, "latencyMs": 1471 }, { "questionId": "q49", "format": "yaml", "model": "claude-haiku-4-5", "expected": "41", "actual": "31", "correct": false, "inputTokens": 5760, "outputTokens": 5, "latencyMs": 1311 }, { "questionId": "q50", "format": "json", "model": "gpt-4o-mini", "expected": "26", "actual": "22", "correct": false, "inputTokens": 6393, "outputTokens": 2, "latencyMs": 3950 }, { "questionId": "q50", "format": "json", "model": "claude-haiku-4-5", "expected": "26", "actual": "20", "correct": false, "inputTokens": 7870, "outputTokens": 5, "latencyMs": 1075 }, { "questionId": "q50", "format": "toon", "model": "gpt-4o-mini", "expected": "26", "actual": "22", "correct": false, "inputTokens": 2530, "outputTokens": 2, "latencyMs": 1868 }, { "questionId": "q50", "format": "toon", "model": "claude-haiku-4-5", "expected": "26", "actual": "16", "correct": false, "inputTokens": 2982, "outputTokens": 5, "latencyMs": 1075 }, { "questionId": "q50", "format": "csv", "model": "gpt-4o-mini", "expected": "26", "actual": "24", "correct": false, "inputTokens": 2384, "outputTokens": 2, "latencyMs": 1973 }, { "questionId": "q50", "format": "csv", "model": "claude-haiku-4-5", "expected": "26", "actual": "16", "correct": false, "inputTokens": 2856, "outputTokens": 5, "latencyMs": 947 }, { "questionId": "q50", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "26", "actual": "22", "correct": false, "inputTokens": 6319, "outputTokens": 2, "latencyMs": 1414 }, { "questionId": "q50", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "26", "actual": "16", "correct": false, "inputTokens": 6365, "outputTokens": 5, "latencyMs": 1221 }, { "questionId": "q50", "format": "yaml", "model": "gpt-4o-mini", "expected": "26", "actual": "18", "correct": false, "inputTokens": 5015, "outputTokens": 2, "latencyMs": 1148 }, { "questionId": "q50", "format": "yaml", "model": "claude-haiku-4-5", "expected": "26", "actual": "20", "correct": false, "inputTokens": 5760, "outputTokens": 5, "latencyMs": 1286 }, { "questionId": "q51", "format": "json", "model": "gpt-4o-mini", "expected": "78", "actual": "66", "correct": false, "inputTokens": 6387, "outputTokens": 2, "latencyMs": 2525 }, { "questionId": "q51", "format": "json", "model": "claude-haiku-4-5", "expected": "78", "actual": "81", "correct": false, "inputTokens": 7864, "outputTokens": 5, "latencyMs": 1613 }, { "questionId": "q51", "format": "toon", "model": "gpt-4o-mini", "expected": "78", "actual": "66", "correct": false, "inputTokens": 2524, "outputTokens": 2, "latencyMs": 1132 }, { "questionId": "q51", "format": "toon", "model": "claude-haiku-4-5", "expected": "78", "actual": "78", "correct": true, "inputTokens": 2976, "outputTokens": 5, "latencyMs": 1104 }, { "questionId": "q51", "format": "csv", "model": "gpt-4o-mini", "expected": "78", "actual": "77", "correct": false, "inputTokens": 2378, "outputTokens": 2, "latencyMs": 1069 }, { "questionId": "q51", "format": "csv", "model": "claude-haiku-4-5", "expected": "78", "actual": "73", "correct": false, "inputTokens": 2850, "outputTokens": 5, "latencyMs": 1113 }, { "questionId": "q51", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "78", "actual": "66", "correct": false, "inputTokens": 6313, "outputTokens": 2, "latencyMs": 1999 }, { "questionId": "q51", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "78", "actual": "78", "correct": true, "inputTokens": 6359, "outputTokens": 5, "latencyMs": 1214 }, { "questionId": "q51", "format": "yaml", "model": "gpt-4o-mini", "expected": "78", "actual": "66", "correct": false, "inputTokens": 5009, "outputTokens": 2, "latencyMs": 1613 }, { "questionId": "q51", "format": "yaml", "model": "claude-haiku-4-5", "expected": "78", "actual": "77", "correct": false, "inputTokens": 5754, "outputTokens": 5, "latencyMs": 1012 }, { "questionId": "q52", "format": "json", "model": "gpt-4o-mini", "expected": "22", "actual": "30", "correct": false, "inputTokens": 6387, "outputTokens": 2, "latencyMs": 1580 }, { "questionId": "q52", "format": "json", "model": "claude-haiku-4-5", "expected": "22", "actual": "15", "correct": false, "inputTokens": 7864, "outputTokens": 5, "latencyMs": 1688 }, { "questionId": "q52", "format": "toon", "model": "gpt-4o-mini", "expected": "22", "actual": "22", "correct": true, "inputTokens": 2524, "outputTokens": 2, "latencyMs": 1290 }, { "questionId": "q52", "format": "toon", "model": "claude-haiku-4-5", "expected": "22", "actual": "16", "correct": false, "inputTokens": 2976, "outputTokens": 5, "latencyMs": 1121 }, { "questionId": "q52", "format": "csv", "model": "gpt-4o-mini", "expected": "22", "actual": "10", "correct": false, "inputTokens": 2378, "outputTokens": 2, "latencyMs": 1544 }, { "questionId": "q52", "format": "csv", "model": "claude-haiku-4-5", "expected": "22", "actual": "20", "correct": false, "inputTokens": 2850, "outputTokens": 5, "latencyMs": 822 }, { "questionId": "q52", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "22", "actual": "34", "correct": false, "inputTokens": 6313, "outputTokens": 2, "latencyMs": 2718 }, { "questionId": "q52", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "22", "actual": "15", "correct": false, "inputTokens": 6359, "outputTokens": 5, "latencyMs": 1211 }, { "questionId": "q52", "format": "yaml", "model": "gpt-4o-mini", "expected": "22", "actual": "34", "correct": false, "inputTokens": 5009, "outputTokens": 2, "latencyMs": 1162 }, { "questionId": "q52", "format": "yaml", "model": "claude-haiku-4-5", "expected": "22", "actual": "16", "correct": false, "inputTokens": 5754, "outputTokens": 5, "latencyMs": 1156 }, { "questionId": "q53", "format": "json", "model": "gpt-4o-mini", "expected": "12", "actual": "24", "correct": false, "inputTokens": 6395, "outputTokens": 2, "latencyMs": 1089 }, { "questionId": "q53", "format": "json", "model": "claude-haiku-4-5", "expected": "12", "actual": "9", "correct": false, "inputTokens": 7872, "outputTokens": 5, "latencyMs": 1368 }, { "questionId": "q53", "format": "toon", "model": "gpt-4o-mini", "expected": "12", "actual": "24", "correct": false, "inputTokens": 2532, "outputTokens": 2, "latencyMs": 1850 }, { "questionId": "q53", "format": "toon", "model": "claude-haiku-4-5", "expected": "12", "actual": "9", "correct": false, "inputTokens": 2984, "outputTokens": 5, "latencyMs": 914 }, { "questionId": "q53", "format": "csv", "model": "gpt-4o-mini", "expected": "12", "actual": "34", "correct": false, "inputTokens": 2386, "outputTokens": 2, "latencyMs": 1156 }, { "questionId": "q53", "format": "csv", "model": "claude-haiku-4-5", "expected": "12", "actual": "10", "correct": false, "inputTokens": 2858, "outputTokens": 5, "latencyMs": 1118 }, { "questionId": "q53", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "12", "actual": "22", "correct": false, "inputTokens": 6321, "outputTokens": 2, "latencyMs": 1020 }, { "questionId": "q53", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "12", "actual": "8", "correct": false, "inputTokens": 6367, "outputTokens": 5, "latencyMs": 1021 }, { "questionId": "q53", "format": "yaml", "model": "gpt-4o-mini", "expected": "12", "actual": "18", "correct": false, "inputTokens": 5017, "outputTokens": 2, "latencyMs": 1236 }, { "questionId": "q53", "format": "yaml", "model": "claude-haiku-4-5", "expected": "12", "actual": "10", "correct": false, "inputTokens": 5762, "outputTokens": 5, "latencyMs": 1574 }, { "questionId": "q54", "format": "json", "model": "gpt-4o-mini", "expected": "11", "actual": "24", "correct": false, "inputTokens": 6395, "outputTokens": 2, "latencyMs": 1437 }, { "questionId": "q54", "format": "json", "model": "claude-haiku-4-5", "expected": "11", "actual": "7", "correct": false, "inputTokens": 7872, "outputTokens": 5, "latencyMs": 1091 }, { "questionId": "q54", "format": "toon", "model": "gpt-4o-mini", "expected": "11", "actual": "24", "correct": false, "inputTokens": 2532, "outputTokens": 2, "latencyMs": 1917 }, { "questionId": "q54", "format": "toon", "model": "claude-haiku-4-5", "expected": "11", "actual": "6", "correct": false, "inputTokens": 2984, "outputTokens": 5, "latencyMs": 1095 }, { "questionId": "q54", "format": "csv", "model": "gpt-4o-mini", "expected": "11", "actual": "34", "correct": false, "inputTokens": 2386, "outputTokens": 2, "latencyMs": 4230 }, { "questionId": "q54", "format": "csv", "model": "claude-haiku-4-5", "expected": "11", "actual": "8", "correct": false, "inputTokens": 2858, "outputTokens": 5, "latencyMs": 1187 }, { "questionId": "q54", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "11", "actual": "24", "correct": false, "inputTokens": 6321, "outputTokens": 2, "latencyMs": 1197 }, { "questionId": "q54", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "11", "actual": "6", "correct": false, "inputTokens": 6367, "outputTokens": 5, "latencyMs": 1176 }, { "questionId": "q54", "format": "yaml", "model": "gpt-4o-mini", "expected": "11", "actual": "18", "correct": false, "inputTokens": 5017, "outputTokens": 2, "latencyMs": 1249 }, { "questionId": "q54", "format": "yaml", "model": "claude-haiku-4-5", "expected": "11", "actual": "8", "correct": false, "inputTokens": 5762, "outputTokens": 5, "latencyMs": 1383 }, { "questionId": "q55", "format": "json", "model": "gpt-4o-mini", "expected": "11", "actual": "30", "correct": false, "inputTokens": 6395, "outputTokens": 2, "latencyMs": 1149 }, { "questionId": "q55", "format": "json", "model": "claude-haiku-4-5", "expected": "11", "actual": "8", "correct": false, "inputTokens": 7872, "outputTokens": 5, "latencyMs": 1072 }, { "questionId": "q55", "format": "toon", "model": "gpt-4o-mini", "expected": "11", "actual": "18", "correct": false, "inputTokens": 2532, "outputTokens": 2, "latencyMs": 1213 }, { "questionId": "q55", "format": "toon", "model": "claude-haiku-4-5", "expected": "11", "actual": "7", "correct": false, "inputTokens": 2984, "outputTokens": 5, "latencyMs": 1507 }, { "questionId": "q55", "format": "csv", "model": "gpt-4o-mini", "expected": "11", "actual": "34", "correct": false, "inputTokens": 2386, "outputTokens": 2, "latencyMs": 1826 }, { "questionId": "q55", "format": "csv", "model": "claude-haiku-4-5", "expected": "11", "actual": "8", "correct": false, "inputTokens": 2858, "outputTokens": 5, "latencyMs": 1162 }, { "questionId": "q55", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "11", "actual": "24", "correct": false, "inputTokens": 6321, "outputTokens": 2, "latencyMs": 1008 }, { "questionId": "q55", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "11", "actual": "7", "correct": false, "inputTokens": 6367, "outputTokens": 5, "latencyMs": 1285 }, { "questionId": "q55", "format": "yaml", "model": "gpt-4o-mini", "expected": "11", "actual": "22", "correct": false, "inputTokens": 5017, "outputTokens": 2, "latencyMs": 1124 }, { "questionId": "q55", "format": "yaml", "model": "claude-haiku-4-5", "expected": "11", "actual": "9", "correct": false, "inputTokens": 5762, "outputTokens": 5, "latencyMs": 1212 }, { "questionId": "q56", "format": "json", "model": "gpt-4o-mini", "expected": "12", "actual": "22", "correct": false, "inputTokens": 6395, "outputTokens": 2, "latencyMs": 1232 }, { "questionId": "q56", "format": "json", "model": "claude-haiku-4-5", "expected": "12", "actual": "7", "correct": false, "inputTokens": 7872, "outputTokens": 5, "latencyMs": 1792 }, { "questionId": "q56", "format": "toon", "model": "gpt-4o-mini", "expected": "12", "actual": "12", "correct": true, "inputTokens": 2532, "outputTokens": 2, "latencyMs": 1357 }, { "questionId": "q56", "format": "toon", "model": "claude-haiku-4-5", "expected": "12", "actual": "6", "correct": false, "inputTokens": 2984, "outputTokens": 5, "latencyMs": 1247 }, { "questionId": "q56", "format": "csv", "model": "gpt-4o-mini", "expected": "12", "actual": "22", "correct": false, "inputTokens": 2386, "outputTokens": 2, "latencyMs": 1043 }, { "questionId": "q56", "format": "csv", "model": "claude-haiku-4-5", "expected": "12", "actual": "7", "correct": false, "inputTokens": 2858, "outputTokens": 5, "latencyMs": 1065 }, { "questionId": "q56", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "12", "actual": "10", "correct": false, "inputTokens": 6321, "outputTokens": 2, "latencyMs": 1298 }, { "questionId": "q56", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "12", "actual": "7", "correct": false, "inputTokens": 6367, "outputTokens": 5, "latencyMs": 1767 }, { "questionId": "q56", "format": "yaml", "model": "gpt-4o-mini", "expected": "12", "actual": "10", "correct": false, "inputTokens": 5017, "outputTokens": 2, "latencyMs": 3525 }, { "questionId": "q56", "format": "yaml", "model": "claude-haiku-4-5", "expected": "12", "actual": "8", "correct": false, "inputTokens": 5762, "outputTokens": 5, "latencyMs": 1355 }, { "questionId": "q57", "format": "json", "model": "gpt-4o-mini", "expected": "62", "actual": "54", "correct": false, "inputTokens": 6394, "outputTokens": 2, "latencyMs": 1359 }, { "questionId": "q57", "format": "json", "model": "claude-haiku-4-5", "expected": "62", "actual": "62", "correct": true, "inputTokens": 7872, "outputTokens": 5, "latencyMs": 1447 }, { "questionId": "q57", "format": "toon", "model": "gpt-4o-mini", "expected": "62", "actual": "54", "correct": false, "inputTokens": 2531, "outputTokens": 2, "latencyMs": 3832 }, { "questionId": "q57", "format": "toon", "model": "claude-haiku-4-5", "expected": "62", "actual": "62", "correct": true, "inputTokens": 2984, "outputTokens": 5, "latencyMs": 1143 }, { "questionId": "q57", "format": "csv", "model": "gpt-4o-mini", "expected": "62", "actual": "66", "correct": false, "inputTokens": 2385, "outputTokens": 2, "latencyMs": 1370 }, { "questionId": "q57", "format": "csv", "model": "claude-haiku-4-5", "expected": "62", "actual": "62", "correct": true, "inputTokens": 2858, "outputTokens": 5, "latencyMs": 1042 }, { "questionId": "q57", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "62", "actual": "54", "correct": false, "inputTokens": 6320, "outputTokens": 2, "latencyMs": 1015 }, { "questionId": "q57", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "62", "actual": "62", "correct": true, "inputTokens": 6367, "outputTokens": 5, "latencyMs": 1395 }, { "questionId": "q57", "format": "yaml", "model": "gpt-4o-mini", "expected": "62", "actual": "54", "correct": false, "inputTokens": 5016, "outputTokens": 2, "latencyMs": 1008 }, { "questionId": "q57", "format": "yaml", "model": "claude-haiku-4-5", "expected": "62", "actual": "62", "correct": true, "inputTokens": 5762, "outputTokens": 5, "latencyMs": 1191 }, { "questionId": "q58", "format": "json", "model": "gpt-4o-mini", "expected": "45", "actual": "38", "correct": false, "inputTokens": 6394, "outputTokens": 2, "latencyMs": 1304 }, { "questionId": "q58", "format": "json", "model": "claude-haiku-4-5", "expected": "45", "actual": "42", "correct": false, "inputTokens": 7872, "outputTokens": 5, "latencyMs": 1386 }, { "questionId": "q58", "format": "toon", "model": "gpt-4o-mini", "expected": "45", "actual": "38", "correct": false, "inputTokens": 2531, "outputTokens": 2, "latencyMs": 1433 }, { "questionId": "q58", "format": "toon", "model": "claude-haiku-4-5", "expected": "45", "actual": "42", "correct": false, "inputTokens": 2984, "outputTokens": 5, "latencyMs": 967 }, { "questionId": "q58", "format": "csv", "model": "gpt-4o-mini", "expected": "45", "actual": "42", "correct": false, "inputTokens": 2385, "outputTokens": 2, "latencyMs": 2469 }, { "questionId": "q58", "format": "csv", "model": "claude-haiku-4-5", "expected": "45", "actual": "42", "correct": false, "inputTokens": 2858, "outputTokens": 5, "latencyMs": 1382 }, { "questionId": "q58", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "45", "actual": "38", "correct": false, "inputTokens": 6320, "outputTokens": 2, "latencyMs": 1658 }, { "questionId": "q58", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "45", "actual": "42", "correct": false, "inputTokens": 6367, "outputTokens": 5, "latencyMs": 1450 }, { "questionId": "q58", "format": "yaml", "model": "gpt-4o-mini", "expected": "45", "actual": "38", "correct": false, "inputTokens": 5016, "outputTokens": 2, "latencyMs": 1428 }, { "questionId": "q58", "format": "yaml", "model": "claude-haiku-4-5", "expected": "45", "actual": "38", "correct": false, "inputTokens": 5762, "outputTokens": 5, "latencyMs": 1144 }, { "questionId": "q59", "format": "json", "model": "gpt-4o-mini", "expected": "96.17", "actual": "96.17", "correct": true, "inputTokens": 9740, "outputTokens": 4, "latencyMs": 1577 }, { "questionId": "q59", "format": "json", "model": "claude-haiku-4-5", "expected": "96.17", "actual": "96.17", "correct": true, "inputTokens": 11907, "outputTokens": 7, "latencyMs": 1181 }, { "questionId": "q59", "format": "toon", "model": "gpt-4o-mini", "expected": "96.17", "actual": "96.17", "correct": true, "inputTokens": 6014, "outputTokens": 4, "latencyMs": 1231 }, { "questionId": "q59", "format": "toon", "model": "claude-haiku-4-5", "expected": "96.17", "actual": "96.17", "correct": true, "inputTokens": 6993, "outputTokens": 7, "latencyMs": 1407 }, { "questionId": "q59", "format": "csv", "model": "gpt-4o-mini", "expected": "96.17", "actual": "96.17", "correct": true, "inputTokens": 6782, "outputTokens": 4, "latencyMs": 1393 }, { "questionId": "q59", "format": "csv", "model": "claude-haiku-4-5", "expected": "96.17", "actual": "96.17", "correct": true, "inputTokens": 8414, "outputTokens": 7, "latencyMs": 1534 }, { "questionId": "q59", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "96.17", "actual": "96.17", "correct": true, "inputTokens": 9159, "outputTokens": 4, "latencyMs": 1456 }, { "questionId": "q59", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "96.17", "actual": "96.17", "correct": true, "inputTokens": 9289, "outputTokens": 7, "latencyMs": 1933 }, { "questionId": "q59", "format": "yaml", "model": "gpt-4o-mini", "expected": "96.17", "actual": "96.17", "correct": true, "inputTokens": 7374, "outputTokens": 4, "latencyMs": 1472 }, { "questionId": "q59", "format": "yaml", "model": "claude-haiku-4-5", "expected": "96.17", "actual": "96.17", "correct": true, "inputTokens": 8385, "outputTokens": 7, "latencyMs": 1224 }, { "questionId": "q60", "format": "json", "model": "gpt-4o-mini", "expected": "shipped", "actual": "shipped", "correct": true, "inputTokens": 9739, "outputTokens": 3, "latencyMs": 2069 }, { "questionId": "q60", "format": "json", "model": "claude-haiku-4-5", "expected": "shipped", "actual": "shipped", "correct": true, "inputTokens": 11906, "outputTokens": 4, "latencyMs": 1172 }, { "questionId": "q60", "format": "toon", "model": "gpt-4o-mini", "expected": "shipped", "actual": "shipped", "correct": true, "inputTokens": 6013, "outputTokens": 3, "latencyMs": 1236 }, { "questionId": "q60", "format": "toon", "model": "claude-haiku-4-5", "expected": "shipped", "actual": "shipped", "correct": true, "inputTokens": 6992, "outputTokens": 4, "latencyMs": 1157 }, { "questionId": "q60", "format": "csv", "model": "gpt-4o-mini", "expected": "shipped", "actual": "shipped", "correct": true, "inputTokens": 6781, "outputTokens": 3, "latencyMs": 1364 }, { "questionId": "q60", "format": "csv", "model": "claude-haiku-4-5", "expected": "shipped", "actual": "shipped", "correct": true, "inputTokens": 8413, "outputTokens": 4, "latencyMs": 1041 }, { "questionId": "q60", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "shipped", "actual": "shipped", "correct": true, "inputTokens": 9158, "outputTokens": 3, "latencyMs": 1478 }, { "questionId": "q60", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "shipped", "actual": "shipped", "correct": true, "inputTokens": 9288, "outputTokens": 4, "latencyMs": 1266 }, { "questionId": "q60", "format": "yaml", "model": "gpt-4o-mini", "expected": "shipped", "actual": "shipped", "correct": true, "inputTokens": 7373, "outputTokens": 3, "latencyMs": 3477 }, { "questionId": "q60", "format": "yaml", "model": "claude-haiku-4-5", "expected": "shipped", "actual": "shipped", "correct": true, "inputTokens": 8384, "outputTokens": 4, "latencyMs": 2630 }, { "questionId": "q61", "format": "json", "model": "gpt-4o-mini", "expected": "599.39", "actual": "599.39", "correct": true, "inputTokens": 9740, "outputTokens": 4, "latencyMs": 1479 }, { "questionId": "q61", "format": "json", "model": "claude-haiku-4-5", "expected": "599.39", "actual": "599.39", "correct": true, "inputTokens": 11907, "outputTokens": 7, "latencyMs": 1270 }, { "questionId": "q61", "format": "toon", "model": "gpt-4o-mini", "expected": "599.39", "actual": "599.39", "correct": true, "inputTokens": 6014, "outputTokens": 4, "latencyMs": 1270 }, { "questionId": "q61", "format": "toon", "model": "claude-haiku-4-5", "expected": "599.39", "actual": "599.39", "correct": true, "inputTokens": 6993, "outputTokens": 7, "latencyMs": 1342 }, { "questionId": "q61", "format": "csv", "model": "gpt-4o-mini", "expected": "599.39", "actual": "599.39", "correct": true, "inputTokens": 6782, "outputTokens": 4, "latencyMs": 1350 }, { "questionId": "q61", "format": "csv", "model": "claude-haiku-4-5", "expected": "599.39", "actual": "599.39", "correct": true, "inputTokens": 8414, "outputTokens": 7, "latencyMs": 1205 }, { "questionId": "q61", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "599.39", "actual": "599.39", "correct": true, "inputTokens": 9159, "outputTokens": 4, "latencyMs": 1502 }, { "questionId": "q61", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "599.39", "actual": "599.39", "correct": true, "inputTokens": 9289, "outputTokens": 7, "latencyMs": 1571 }, { "questionId": "q61", "format": "yaml", "model": "gpt-4o-mini", "expected": "599.39", "actual": "599.39", "correct": true, "inputTokens": 7374, "outputTokens": 4, "latencyMs": 2013 }, { "questionId": "q61", "format": "yaml", "model": "claude-haiku-4-5", "expected": "599.39", "actual": "599.39", "correct": true, "inputTokens": 8385, "outputTokens": 7, "latencyMs": 1428 }, { "questionId": "q62", "format": "json", "model": "gpt-4o-mini", "expected": "processing", "actual": "processing", "correct": true, "inputTokens": 9739, "outputTokens": 2, "latencyMs": 1666 }, { "questionId": "q62", "format": "json", "model": "claude-haiku-4-5", "expected": "processing", "actual": "processing", "correct": true, "inputTokens": 11906, "outputTokens": 4, "latencyMs": 1549 }, { "questionId": "q62", "format": "toon", "model": "gpt-4o-mini", "expected": "processing", "actual": "processing", "correct": true, "inputTokens": 6013, "outputTokens": 2, "latencyMs": 1033 }, { "questionId": "q62", "format": "toon", "model": "claude-haiku-4-5", "expected": "processing", "actual": "processing", "correct": true, "inputTokens": 6992, "outputTokens": 4, "latencyMs": 1061 }, { "questionId": "q62", "format": "csv", "model": "gpt-4o-mini", "expected": "processing", "actual": "processing", "correct": true, "inputTokens": 6781, "outputTokens": 2, "latencyMs": 2008 }, { "questionId": "q62", "format": "csv", "model": "claude-haiku-4-5", "expected": "processing", "actual": "processing", "correct": true, "inputTokens": 8413, "outputTokens": 4, "latencyMs": 1214 }, { "questionId": "q62", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "processing", "actual": "processing", "correct": true, "inputTokens": 9158, "outputTokens": 2, "latencyMs": 1321 }, { "questionId": "q62", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "processing", "actual": "processing", "correct": true, "inputTokens": 9288, "outputTokens": 4, "latencyMs": 1311 }, { "questionId": "q62", "format": "yaml", "model": "gpt-4o-mini", "expected": "processing", "actual": "processing", "correct": true, "inputTokens": 7373, "outputTokens": 2, "latencyMs": 1769 }, { "questionId": "q62", "format": "yaml", "model": "claude-haiku-4-5", "expected": "processing", "actual": "processing", "correct": true, "inputTokens": 8384, "outputTokens": 4, "latencyMs": 1157 }, { "questionId": "q63", "format": "json", "model": "gpt-4o-mini", "expected": "528.71", "actual": "528.71", "correct": true, "inputTokens": 9740, "outputTokens": 4, "latencyMs": 1213 }, { "questionId": "q63", "format": "json", "model": "claude-haiku-4-5", "expected": "528.71", "actual": "528.71", "correct": true, "inputTokens": 11907, "outputTokens": 7, "latencyMs": 1332 }, { "questionId": "q63", "format": "toon", "model": "gpt-4o-mini", "expected": "528.71", "actual": "528.71", "correct": true, "inputTokens": 6014, "outputTokens": 4, "latencyMs": 3749 }, { "questionId": "q63", "format": "toon", "model": "claude-haiku-4-5", "expected": "528.71", "actual": "528.71", "correct": true, "inputTokens": 6993, "outputTokens": 7, "latencyMs": 1326 }, { "questionId": "q63", "format": "csv", "model": "gpt-4o-mini", "expected": "528.71", "actual": "528.71", "correct": true, "inputTokens": 6782, "outputTokens": 4, "latencyMs": 947 }, { "questionId": "q63", "format": "csv", "model": "claude-haiku-4-5", "expected": "528.71", "actual": "528.71", "correct": true, "inputTokens": 8414, "outputTokens": 7, "latencyMs": 1251 }, { "questionId": "q63", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "528.71", "actual": "528.71", "correct": true, "inputTokens": 9159, "outputTokens": 4, "latencyMs": 1428 }, { "questionId": "q63", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "528.71", "actual": "528.71", "correct": true, "inputTokens": 9289, "outputTokens": 7, "latencyMs": 1659 }, { "questionId": "q63", "format": "yaml", "model": "gpt-4o-mini", "expected": "528.71", "actual": "528.71", "correct": true, "inputTokens": 7374, "outputTokens": 4, "latencyMs": 5584 }, { "questionId": "q63", "format": "yaml", "model": "claude-haiku-4-5", "expected": "528.71", "actual": "528.71", "correct": true, "inputTokens": 8385, "outputTokens": 7, "latencyMs": 1251 }, { "questionId": "q64", "format": "json", "model": "gpt-4o-mini", "expected": "pending", "actual": "pending", "correct": true, "inputTokens": 9739, "outputTokens": 2, "latencyMs": 2425 }, { "questionId": "q64", "format": "json", "model": "claude-haiku-4-5", "expected": "pending", "actual": "pending", "correct": true, "inputTokens": 11906, "outputTokens": 4, "latencyMs": 1481 }, { "questionId": "q64", "format": "toon", "model": "gpt-4o-mini", "expected": "pending", "actual": "pending", "correct": true, "inputTokens": 6013, "outputTokens": 2, "latencyMs": 1109 }, { "questionId": "q64", "format": "toon", "model": "claude-haiku-4-5", "expected": "pending", "actual": "pending", "correct": true, "inputTokens": 6992, "outputTokens": 4, "latencyMs": 1048 }, { "questionId": "q64", "format": "csv", "model": "gpt-4o-mini", "expected": "pending", "actual": "pending", "correct": true, "inputTokens": 6781, "outputTokens": 2, "latencyMs": 1256 }, { "questionId": "q64", "format": "csv", "model": "claude-haiku-4-5", "expected": "pending", "actual": "pending", "correct": true, "inputTokens": 8413, "outputTokens": 4, "latencyMs": 1117 }, { "questionId": "q64", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "pending", "actual": "pending", "correct": true, "inputTokens": 9158, "outputTokens": 2, "latencyMs": 1168 }, { "questionId": "q64", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "pending", "actual": "pending", "correct": true, "inputTokens": 9288, "outputTokens": 4, "latencyMs": 1504 }, { "questionId": "q64", "format": "yaml", "model": "gpt-4o-mini", "expected": "pending", "actual": "pending", "correct": true, "inputTokens": 7373, "outputTokens": 2, "latencyMs": 1134 }, { "questionId": "q64", "format": "yaml", "model": "claude-haiku-4-5", "expected": "pending", "actual": "pending", "correct": true, "inputTokens": 8384, "outputTokens": 4, "latencyMs": 1059 }, { "questionId": "q65", "format": "json", "model": "gpt-4o-mini", "expected": "1687.82", "actual": "1687.82", "correct": true, "inputTokens": 9740, "outputTokens": 5, "latencyMs": 2361 }, { "questionId": "q65", "format": "json", "model": "claude-haiku-4-5", "expected": "1687.82", "actual": "1687.82", "correct": true, "inputTokens": 11907, "outputTokens": 8, "latencyMs": 1158 }, { "questionId": "q65", "format": "toon", "model": "gpt-4o-mini", "expected": "1687.82", "actual": "1687.82", "correct": true, "inputTokens": 6014, "outputTokens": 5, "latencyMs": 1493 }, { "questionId": "q65", "format": "toon", "model": "claude-haiku-4-5", "expected": "1687.82", "actual": "1687.82", "correct": true, "inputTokens": 6993, "outputTokens": 8, "latencyMs": 1068 }, { "questionId": "q65", "format": "csv", "model": "gpt-4o-mini", "expected": "1687.82", "actual": "1687.82", "correct": true, "inputTokens": 6782, "outputTokens": 5, "latencyMs": 1490 }, { "questionId": "q65", "format": "csv", "model": "claude-haiku-4-5", "expected": "1687.82", "actual": "1687.82", "correct": true, "inputTokens": 8414, "outputTokens": 8, "latencyMs": 1386 }, { "questionId": "q65", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "1687.82", "actual": "1687.82", "correct": true, "inputTokens": 9159, "outputTokens": 5, "latencyMs": 1470 }, { "questionId": "q65", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "1687.82", "actual": "1687.82", "correct": true, "inputTokens": 9289, "outputTokens": 8, "latencyMs": 1189 }, { "questionId": "q65", "format": "yaml", "model": "gpt-4o-mini", "expected": "1687.82", "actual": "1687.82", "correct": true, "inputTokens": 7374, "outputTokens": 5, "latencyMs": 2824 }, { "questionId": "q65", "format": "yaml", "model": "claude-haiku-4-5", "expected": "1687.82", "actual": "1687.82", "correct": true, "inputTokens": 8385, "outputTokens": 8, "latencyMs": 1565 }, { "questionId": "q66", "format": "json", "model": "gpt-4o-mini", "expected": "cancelled", "actual": "cancelled", "correct": true, "inputTokens": 9739, "outputTokens": 3, "latencyMs": 1480 }, { "questionId": "q66", "format": "json", "model": "claude-haiku-4-5", "expected": "cancelled", "actual": "cancelled", "correct": true, "inputTokens": 11906, "outputTokens": 4, "latencyMs": 1354 }, { "questionId": "q66", "format": "toon", "model": "gpt-4o-mini", "expected": "cancelled", "actual": "cancelled", "correct": true, "inputTokens": 6013, "outputTokens": 3, "latencyMs": 5334 }, { "questionId": "q66", "format": "toon", "model": "claude-haiku-4-5", "expected": "cancelled", "actual": "cancelled", "correct": true, "inputTokens": 6992, "outputTokens": 4, "latencyMs": 1158 }, { "questionId": "q66", "format": "csv", "model": "gpt-4o-mini", "expected": "cancelled", "actual": "cancelled", "correct": true, "inputTokens": 6781, "outputTokens": 3, "latencyMs": 2043 }, { "questionId": "q66", "format": "csv", "model": "claude-haiku-4-5", "expected": "cancelled", "actual": "cancelled", "correct": true, "inputTokens": 8413, "outputTokens": 4, "latencyMs": 1302 }, { "questionId": "q66", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "cancelled", "actual": "cancelled", "correct": true, "inputTokens": 9158, "outputTokens": 3, "latencyMs": 1006 }, { "questionId": "q66", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "cancelled", "actual": "cancelled", "correct": true, "inputTokens": 9288, "outputTokens": 4, "latencyMs": 1106 }, { "questionId": "q66", "format": "yaml", "model": "gpt-4o-mini", "expected": "cancelled", "actual": "cancelled", "correct": true, "inputTokens": 7373, "outputTokens": 3, "latencyMs": 1801 }, { "questionId": "q66", "format": "yaml", "model": "claude-haiku-4-5", "expected": "cancelled", "actual": "cancelled", "correct": true, "inputTokens": 8384, "outputTokens": 4, "latencyMs": 1626 }, { "questionId": "q67", "format": "json", "model": "gpt-4o-mini", "expected": "423.6", "actual": "423.6", "correct": true, "inputTokens": 9740, "outputTokens": 4, "latencyMs": 2107 }, { "questionId": "q67", "format": "json", "model": "claude-haiku-4-5", "expected": "423.6", "actual": "423.6", "correct": true, "inputTokens": 11907, "outputTokens": 7, "latencyMs": 1183 }, { "questionId": "q67", "format": "toon", "model": "gpt-4o-mini", "expected": "423.6", "actual": "423.6", "correct": true, "inputTokens": 6014, "outputTokens": 4, "latencyMs": 7091 }, { "questionId": "q67", "format": "toon", "model": "claude-haiku-4-5", "expected": "423.6", "actual": "423.6", "correct": true, "inputTokens": 6993, "outputTokens": 7, "latencyMs": 1730 }, { "questionId": "q67", "format": "csv", "model": "gpt-4o-mini", "expected": "423.6", "actual": "423.6", "correct": true, "inputTokens": 6782, "outputTokens": 4, "latencyMs": 1222 }, { "questionId": "q67", "format": "csv", "model": "claude-haiku-4-5", "expected": "423.6", "actual": "423.6", "correct": true, "inputTokens": 8414, "outputTokens": 7, "latencyMs": 1447 }, { "questionId": "q67", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "423.6", "actual": "423.6", "correct": true, "inputTokens": 9159, "outputTokens": 4, "latencyMs": 10295 }, { "questionId": "q67", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "423.6", "actual": "423.6", "correct": true, "inputTokens": 9289, "outputTokens": 7, "latencyMs": 1228 }, { "questionId": "q67", "format": "yaml", "model": "gpt-4o-mini", "expected": "423.6", "actual": "423.6", "correct": true, "inputTokens": 7374, "outputTokens": 4, "latencyMs": 1748 }, { "questionId": "q67", "format": "yaml", "model": "claude-haiku-4-5", "expected": "423.6", "actual": "423.6", "correct": true, "inputTokens": 8385, "outputTokens": 7, "latencyMs": 1373 }, { "questionId": "q68", "format": "json", "model": "gpt-4o-mini", "expected": "delivered", "actual": "delivered", "correct": true, "inputTokens": 9739, "outputTokens": 3, "latencyMs": 3836 }, { "questionId": "q68", "format": "json", "model": "claude-haiku-4-5", "expected": "delivered", "actual": "delivered", "correct": true, "inputTokens": 11906, "outputTokens": 4, "latencyMs": 1297 }, { "questionId": "q68", "format": "toon", "model": "gpt-4o-mini", "expected": "delivered", "actual": "delivered", "correct": true, "inputTokens": 6013, "outputTokens": 3, "latencyMs": 1927 }, { "questionId": "q68", "format": "toon", "model": "claude-haiku-4-5", "expected": "delivered", "actual": "delivered", "correct": true, "inputTokens": 6992, "outputTokens": 4, "latencyMs": 1171 }, { "questionId": "q68", "format": "csv", "model": "gpt-4o-mini", "expected": "delivered", "actual": "delivered", "correct": true, "inputTokens": 6781, "outputTokens": 3, "latencyMs": 1551 }, { "questionId": "q68", "format": "csv", "model": "claude-haiku-4-5", "expected": "delivered", "actual": "delivered", "correct": true, "inputTokens": 8413, "outputTokens": 4, "latencyMs": 1273 }, { "questionId": "q68", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "delivered", "actual": "delivered", "correct": true, "inputTokens": 9158, "outputTokens": 3, "latencyMs": 1387 }, { "questionId": "q68", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "delivered", "actual": "delivered", "correct": true, "inputTokens": 9288, "outputTokens": 4, "latencyMs": 1237 }, { "questionId": "q68", "format": "yaml", "model": "gpt-4o-mini", "expected": "delivered", "actual": "delivered", "correct": true, "inputTokens": 7373, "outputTokens": 3, "latencyMs": 1934 }, { "questionId": "q68", "format": "yaml", "model": "claude-haiku-4-5", "expected": "delivered", "actual": "delivered", "correct": true, "inputTokens": 8384, "outputTokens": 4, "latencyMs": 1132 }, { "questionId": "q69", "format": "json", "model": "gpt-4o-mini", "expected": "784.03", "actual": "784.03", "correct": true, "inputTokens": 9740, "outputTokens": 4, "latencyMs": 2267 }, { "questionId": "q69", "format": "json", "model": "claude-haiku-4-5", "expected": "784.03", "actual": "784.03", "correct": true, "inputTokens": 11907, "outputTokens": 7, "latencyMs": 1772 }, { "questionId": "q69", "format": "toon", "model": "gpt-4o-mini", "expected": "784.03", "actual": "784.03", "correct": true, "inputTokens": 6014, "outputTokens": 4, "latencyMs": 1315 }, { "questionId": "q69", "format": "toon", "model": "claude-haiku-4-5", "expected": "784.03", "actual": "784.03", "correct": true, "inputTokens": 6993, "outputTokens": 7, "latencyMs": 1165 }, { "questionId": "q69", "format": "csv", "model": "gpt-4o-mini", "expected": "784.03", "actual": "784.03", "correct": true, "inputTokens": 6782, "outputTokens": 4, "latencyMs": 1097 }, { "questionId": "q69", "format": "csv", "model": "claude-haiku-4-5", "expected": "784.03", "actual": "784.03", "correct": true, "inputTokens": 8414, "outputTokens": 7, "latencyMs": 1299 }, { "questionId": "q69", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "784.03", "actual": "784.03", "correct": true, "inputTokens": 9159, "outputTokens": 4, "latencyMs": 1779 }, { "questionId": "q69", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "784.03", "actual": "784.03", "correct": true, "inputTokens": 9289, "outputTokens": 7, "latencyMs": 3153 }, { "questionId": "q69", "format": "yaml", "model": "gpt-4o-mini", "expected": "784.03", "actual": "784.03", "correct": true, "inputTokens": 7374, "outputTokens": 4, "latencyMs": 1813 }, { "questionId": "q69", "format": "yaml", "model": "claude-haiku-4-5", "expected": "784.03", "actual": "784.03", "correct": true, "inputTokens": 8385, "outputTokens": 7, "latencyMs": 1867 }, { "questionId": "q70", "format": "json", "model": "gpt-4o-mini", "expected": "shipped", "actual": "shipped", "correct": true, "inputTokens": 9739, "outputTokens": 3, "latencyMs": 1611 }, { "questionId": "q70", "format": "json", "model": "claude-haiku-4-5", "expected": "shipped", "actual": "shipped", "correct": true, "inputTokens": 11906, "outputTokens": 4, "latencyMs": 1173 }, { "questionId": "q70", "format": "toon", "model": "gpt-4o-mini", "expected": "shipped", "actual": "shipped", "correct": true, "inputTokens": 6013, "outputTokens": 3, "latencyMs": 1977 }, { "questionId": "q70", "format": "toon", "model": "claude-haiku-4-5", "expected": "shipped", "actual": "shipped", "correct": true, "inputTokens": 6992, "outputTokens": 4, "latencyMs": 1108 }, { "questionId": "q70", "format": "csv", "model": "gpt-4o-mini", "expected": "shipped", "actual": "shipped", "correct": true, "inputTokens": 6781, "outputTokens": 3, "latencyMs": 1324 }, { "questionId": "q70", "format": "csv", "model": "claude-haiku-4-5", "expected": "shipped", "actual": "shipped", "correct": true, "inputTokens": 8413, "outputTokens": 4, "latencyMs": 1225 }, { "questionId": "q70", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "shipped", "actual": "shipped", "correct": true, "inputTokens": 9158, "outputTokens": 3, "latencyMs": 1416 }, { "questionId": "q70", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "shipped", "actual": "shipped", "correct": true, "inputTokens": 9288, "outputTokens": 4, "latencyMs": 1200 }, { "questionId": "q70", "format": "yaml", "model": "gpt-4o-mini", "expected": "shipped", "actual": "shipped", "correct": true, "inputTokens": 7373, "outputTokens": 3, "latencyMs": 1259 }, { "questionId": "q70", "format": "yaml", "model": "claude-haiku-4-5", "expected": "shipped", "actual": "shipped", "correct": true, "inputTokens": 8384, "outputTokens": 4, "latencyMs": 1433 }, { "questionId": "q71", "format": "json", "model": "gpt-4o-mini", "expected": "645.88", "actual": "645.88", "correct": true, "inputTokens": 9740, "outputTokens": 4, "latencyMs": 1729 }, { "questionId": "q71", "format": "json", "model": "claude-haiku-4-5", "expected": "645.88", "actual": "645.88", "correct": true, "inputTokens": 11907, "outputTokens": 7, "latencyMs": 1143 }, { "questionId": "q71", "format": "toon", "model": "gpt-4o-mini", "expected": "645.88", "actual": "645.88", "correct": true, "inputTokens": 6014, "outputTokens": 4, "latencyMs": 1837 }, { "questionId": "q71", "format": "toon", "model": "claude-haiku-4-5", "expected": "645.88", "actual": "645.88", "correct": true, "inputTokens": 6993, "outputTokens": 7, "latencyMs": 1147 }, { "questionId": "q71", "format": "csv", "model": "gpt-4o-mini", "expected": "645.88", "actual": "645.88", "correct": true, "inputTokens": 6782, "outputTokens": 4, "latencyMs": 1777 }, { "questionId": "q71", "format": "csv", "model": "claude-haiku-4-5", "expected": "645.88", "actual": "645.88", "correct": true, "inputTokens": 8414, "outputTokens": 7, "latencyMs": 1295 }, { "questionId": "q71", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "645.88", "actual": "645.88", "correct": true, "inputTokens": 9159, "outputTokens": 4, "latencyMs": 1081 }, { "questionId": "q71", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "645.88", "actual": "645.88", "correct": true, "inputTokens": 9289, "outputTokens": 7, "latencyMs": 1692 }, { "questionId": "q71", "format": "yaml", "model": "gpt-4o-mini", "expected": "645.88", "actual": "645.88", "correct": true, "inputTokens": 7374, "outputTokens": 4, "latencyMs": 1661 }, { "questionId": "q71", "format": "yaml", "model": "claude-haiku-4-5", "expected": "645.88", "actual": "645.88", "correct": true, "inputTokens": 8385, "outputTokens": 7, "latencyMs": 1475 }, { "questionId": "q72", "format": "json", "model": "gpt-4o-mini", "expected": "processing", "actual": "processing", "correct": true, "inputTokens": 9739, "outputTokens": 2, "latencyMs": 2979 }, { "questionId": "q72", "format": "json", "model": "claude-haiku-4-5", "expected": "processing", "actual": "processing", "correct": true, "inputTokens": 11906, "outputTokens": 4, "latencyMs": 1187 }, { "questionId": "q72", "format": "toon", "model": "gpt-4o-mini", "expected": "processing", "actual": "processing", "correct": true, "inputTokens": 6013, "outputTokens": 2, "latencyMs": 1620 }, { "questionId": "q72", "format": "toon", "model": "claude-haiku-4-5", "expected": "processing", "actual": "processing", "correct": true, "inputTokens": 6992, "outputTokens": 4, "latencyMs": 1532 }, { "questionId": "q72", "format": "csv", "model": "gpt-4o-mini", "expected": "processing", "actual": "processing", "correct": true, "inputTokens": 6781, "outputTokens": 2, "latencyMs": 1616 }, { "questionId": "q72", "format": "csv", "model": "claude-haiku-4-5", "expected": "processing", "actual": "processing", "correct": true, "inputTokens": 8413, "outputTokens": 4, "latencyMs": 1435 }, { "questionId": "q72", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "processing", "actual": "processing", "correct": true, "inputTokens": 9158, "outputTokens": 2, "latencyMs": 1190 }, { "questionId": "q72", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "processing", "actual": "processing", "correct": true, "inputTokens": 9288, "outputTokens": 4, "latencyMs": 1414 }, { "questionId": "q72", "format": "yaml", "model": "gpt-4o-mini", "expected": "processing", "actual": "processing", "correct": true, "inputTokens": 7373, "outputTokens": 2, "latencyMs": 2335 }, { "questionId": "q72", "format": "yaml", "model": "claude-haiku-4-5", "expected": "processing", "actual": "processing", "correct": true, "inputTokens": 8384, "outputTokens": 4, "latencyMs": 1308 }, { "questionId": "q73", "format": "json", "model": "gpt-4o-mini", "expected": "371.91", "actual": "371.91", "correct": true, "inputTokens": 9740, "outputTokens": 4, "latencyMs": 3359 }, { "questionId": "q73", "format": "json", "model": "claude-haiku-4-5", "expected": "371.91", "actual": "371.91", "correct": true, "inputTokens": 11907, "outputTokens": 7, "latencyMs": 1227 }, { "questionId": "q73", "format": "toon", "model": "gpt-4o-mini", "expected": "371.91", "actual": "371.91", "correct": true, "inputTokens": 6014, "outputTokens": 4, "latencyMs": 1439 }, { "questionId": "q73", "format": "toon", "model": "claude-haiku-4-5", "expected": "371.91", "actual": "371.91", "correct": true, "inputTokens": 6993, "outputTokens": 7, "latencyMs": 1179 }, { "questionId": "q73", "format": "csv", "model": "gpt-4o-mini", "expected": "371.91", "actual": "371.91", "correct": true, "inputTokens": 6782, "outputTokens": 4, "latencyMs": 1064 }, { "questionId": "q73", "format": "csv", "model": "claude-haiku-4-5", "expected": "371.91", "actual": "371.91", "correct": true, "inputTokens": 8414, "outputTokens": 7, "latencyMs": 1144 }, { "questionId": "q73", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "371.91", "actual": "371.91", "correct": true, "inputTokens": 9159, "outputTokens": 4, "latencyMs": 1873 }, { "questionId": "q73", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "371.91", "actual": "371.91", "correct": true, "inputTokens": 9289, "outputTokens": 7, "latencyMs": 1302 }, { "questionId": "q73", "format": "yaml", "model": "gpt-4o-mini", "expected": "371.91", "actual": "371.91", "correct": true, "inputTokens": 7374, "outputTokens": 4, "latencyMs": 1956 }, { "questionId": "q73", "format": "yaml", "model": "claude-haiku-4-5", "expected": "371.91", "actual": "371.91", "correct": true, "inputTokens": 8385, "outputTokens": 7, "latencyMs": 1281 }, { "questionId": "q74", "format": "json", "model": "gpt-4o-mini", "expected": "pending", "actual": "pending", "correct": true, "inputTokens": 9739, "outputTokens": 2, "latencyMs": 1591 }, { "questionId": "q74", "format": "json", "model": "claude-haiku-4-5", "expected": "pending", "actual": "pending", "correct": true, "inputTokens": 11906, "outputTokens": 4, "latencyMs": 1279 }, { "questionId": "q74", "format": "toon", "model": "gpt-4o-mini", "expected": "pending", "actual": "pending", "correct": true, "inputTokens": 6013, "outputTokens": 2, "latencyMs": 3152 }, { "questionId": "q74", "format": "toon", "model": "claude-haiku-4-5", "expected": "pending", "actual": "pending", "correct": true, "inputTokens": 6992, "outputTokens": 4, "latencyMs": 1061 }, { "questionId": "q74", "format": "csv", "model": "gpt-4o-mini", "expected": "pending", "actual": "pending", "correct": true, "inputTokens": 6781, "outputTokens": 2, "latencyMs": 1557 }, { "questionId": "q74", "format": "csv", "model": "claude-haiku-4-5", "expected": "pending", "actual": "pending", "correct": true, "inputTokens": 8413, "outputTokens": 4, "latencyMs": 1313 }, { "questionId": "q74", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "pending", "actual": "pending", "correct": true, "inputTokens": 9158, "outputTokens": 2, "latencyMs": 1433 }, { "questionId": "q74", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "pending", "actual": "pending", "correct": true, "inputTokens": 9288, "outputTokens": 4, "latencyMs": 1812 }, { "questionId": "q74", "format": "yaml", "model": "gpt-4o-mini", "expected": "pending", "actual": "pending", "correct": true, "inputTokens": 7373, "outputTokens": 2, "latencyMs": 1024 }, { "questionId": "q74", "format": "yaml", "model": "claude-haiku-4-5", "expected": "pending", "actual": "pending", "correct": true, "inputTokens": 8384, "outputTokens": 4, "latencyMs": 1243 }, { "questionId": "q75", "format": "json", "model": "gpt-4o-mini", "expected": "1066", "actual": "1066", "correct": true, "inputTokens": 9740, "outputTokens": 3, "latencyMs": 1500 }, { "questionId": "q75", "format": "json", "model": "claude-haiku-4-5", "expected": "1066", "actual": "1066", "correct": true, "inputTokens": 11907, "outputTokens": 6, "latencyMs": 1275 }, { "questionId": "q75", "format": "toon", "model": "gpt-4o-mini", "expected": "1066", "actual": "1066", "correct": true, "inputTokens": 6014, "outputTokens": 3, "latencyMs": 1841 }, { "questionId": "q75", "format": "toon", "model": "claude-haiku-4-5", "expected": "1066", "actual": "1066", "correct": true, "inputTokens": 6993, "outputTokens": 6, "latencyMs": 1080 }, { "questionId": "q75", "format": "csv", "model": "gpt-4o-mini", "expected": "1066", "actual": "1066", "correct": true, "inputTokens": 6782, "outputTokens": 3, "latencyMs": 1209 }, { "questionId": "q75", "format": "csv", "model": "claude-haiku-4-5", "expected": "1066", "actual": "1066", "correct": true, "inputTokens": 8414, "outputTokens": 6, "latencyMs": 1308 }, { "questionId": "q75", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "1066", "actual": "1066", "correct": true, "inputTokens": 9159, "outputTokens": 3, "latencyMs": 1556 }, { "questionId": "q75", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "1066", "actual": "1066", "correct": true, "inputTokens": 9289, "outputTokens": 6, "latencyMs": 1240 }, { "questionId": "q75", "format": "yaml", "model": "gpt-4o-mini", "expected": "1066", "actual": "1066", "correct": true, "inputTokens": 7374, "outputTokens": 3, "latencyMs": 1254 }, { "questionId": "q75", "format": "yaml", "model": "claude-haiku-4-5", "expected": "1066", "actual": "1066", "correct": true, "inputTokens": 8385, "outputTokens": 6, "latencyMs": 1305 }, { "questionId": "q76", "format": "json", "model": "gpt-4o-mini", "expected": "cancelled", "actual": "cancelled", "correct": true, "inputTokens": 9739, "outputTokens": 3, "latencyMs": 2606 }, { "questionId": "q76", "format": "json", "model": "claude-haiku-4-5", "expected": "cancelled", "actual": "cancelled", "correct": true, "inputTokens": 11906, "outputTokens": 4, "latencyMs": 1422 }, { "questionId": "q76", "format": "toon", "model": "gpt-4o-mini", "expected": "cancelled", "actual": "cancelled", "correct": true, "inputTokens": 6013, "outputTokens": 3, "latencyMs": 2688 }, { "questionId": "q76", "format": "toon", "model": "claude-haiku-4-5", "expected": "cancelled", "actual": "cancelled", "correct": true, "inputTokens": 6992, "outputTokens": 4, "latencyMs": 1041 }, { "questionId": "q76", "format": "csv", "model": "gpt-4o-mini", "expected": "cancelled", "actual": "cancelled", "correct": true, "inputTokens": 6781, "outputTokens": 3, "latencyMs": 3070 }, { "questionId": "q76", "format": "csv", "model": "claude-haiku-4-5", "expected": "cancelled", "actual": "cancelled", "correct": true, "inputTokens": 8413, "outputTokens": 4, "latencyMs": 1167 }, { "questionId": "q76", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "cancelled", "actual": "cancelled", "correct": true, "inputTokens": 9158, "outputTokens": 3, "latencyMs": 1702 }, { "questionId": "q76", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "cancelled", "actual": "cancelled", "correct": true, "inputTokens": 9288, "outputTokens": 4, "latencyMs": 1182 }, { "questionId": "q76", "format": "yaml", "model": "gpt-4o-mini", "expected": "cancelled", "actual": "cancelled", "correct": true, "inputTokens": 7373, "outputTokens": 3, "latencyMs": 1740 }, { "questionId": "q76", "format": "yaml", "model": "claude-haiku-4-5", "expected": "cancelled", "actual": "cancelled", "correct": true, "inputTokens": 8384, "outputTokens": 4, "latencyMs": 1404 }, { "questionId": "q77", "format": "json", "model": "gpt-4o-mini", "expected": "1697.4", "actual": "1697.4", "correct": true, "inputTokens": 9740, "outputTokens": 5, "latencyMs": 1596 }, { "questionId": "q77", "format": "json", "model": "claude-haiku-4-5", "expected": "1697.4", "actual": "1697.4", "correct": true, "inputTokens": 11907, "outputTokens": 8, "latencyMs": 2314 }, { "questionId": "q77", "format": "toon", "model": "gpt-4o-mini", "expected": "1697.4", "actual": "1697.4", "correct": true, "inputTokens": 6014, "outputTokens": 5, "latencyMs": 1114 }, { "questionId": "q77", "format": "toon", "model": "claude-haiku-4-5", "expected": "1697.4", "actual": "1697.4", "correct": true, "inputTokens": 6993, "outputTokens": 8, "latencyMs": 1289 }, { "questionId": "q77", "format": "csv", "model": "gpt-4o-mini", "expected": "1697.4", "actual": "1697.4", "correct": true, "inputTokens": 6782, "outputTokens": 5, "latencyMs": 2428 }, { "questionId": "q77", "format": "csv", "model": "claude-haiku-4-5", "expected": "1697.4", "actual": "1697.4", "correct": true, "inputTokens": 8414, "outputTokens": 8, "latencyMs": 1325 }, { "questionId": "q77", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "1697.4", "actual": "1697.4", "correct": true, "inputTokens": 9159, "outputTokens": 5, "latencyMs": 1343 }, { "questionId": "q77", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "1697.4", "actual": "1697.4", "correct": true, "inputTokens": 9289, "outputTokens": 8, "latencyMs": 1783 }, { "questionId": "q77", "format": "yaml", "model": "gpt-4o-mini", "expected": "1697.4", "actual": "1697.4", "correct": true, "inputTokens": 7374, "outputTokens": 5, "latencyMs": 918 }, { "questionId": "q77", "format": "yaml", "model": "claude-haiku-4-5", "expected": "1697.4", "actual": "1697.4", "correct": true, "inputTokens": 8385, "outputTokens": 8, "latencyMs": 1308 }, { "questionId": "q78", "format": "json", "model": "gpt-4o-mini", "expected": "delivered", "actual": "delivered", "correct": true, "inputTokens": 9739, "outputTokens": 3, "latencyMs": 1396 }, { "questionId": "q78", "format": "json", "model": "claude-haiku-4-5", "expected": "delivered", "actual": "delivered", "correct": true, "inputTokens": 11906, "outputTokens": 4, "latencyMs": 1225 }, { "questionId": "q78", "format": "toon", "model": "gpt-4o-mini", "expected": "delivered", "actual": "delivered", "correct": true, "inputTokens": 6013, "outputTokens": 3, "latencyMs": 2294 }, { "questionId": "q78", "format": "toon", "model": "claude-haiku-4-5", "expected": "delivered", "actual": "delivered", "correct": true, "inputTokens": 6992, "outputTokens": 4, "latencyMs": 1418 }, { "questionId": "q78", "format": "csv", "model": "gpt-4o-mini", "expected": "delivered", "actual": "delivered", "correct": true, "inputTokens": 6781, "outputTokens": 3, "latencyMs": 1613 }, { "questionId": "q78", "format": "csv", "model": "claude-haiku-4-5", "expected": "delivered", "actual": "delivered", "correct": true, "inputTokens": 8413, "outputTokens": 4, "latencyMs": 1374 }, { "questionId": "q78", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "delivered", "actual": "delivered", "correct": true, "inputTokens": 9158, "outputTokens": 3, "latencyMs": 1341 }, { "questionId": "q78", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "delivered", "actual": "delivered", "correct": true, "inputTokens": 9288, "outputTokens": 4, "latencyMs": 1223 }, { "questionId": "q78", "format": "yaml", "model": "gpt-4o-mini", "expected": "delivered", "actual": "delivered", "correct": true, "inputTokens": 7373, "outputTokens": 3, "latencyMs": 2230 }, { "questionId": "q78", "format": "yaml", "model": "claude-haiku-4-5", "expected": "delivered", "actual": "delivered", "correct": true, "inputTokens": 8384, "outputTokens": 4, "latencyMs": 1425 }, { "questionId": "q79", "format": "json", "model": "gpt-4o-mini", "expected": "Valerie Braun", "actual": "Valerie Braun", "correct": true, "inputTokens": 9740, "outputTokens": 4, "latencyMs": 1377 }, { "questionId": "q79", "format": "json", "model": "claude-haiku-4-5", "expected": "Valerie Braun", "actual": "Valerie Braun", "correct": true, "inputTokens": 11907, "outputTokens": 9, "latencyMs": 1550 }, { "questionId": "q79", "format": "toon", "model": "gpt-4o-mini", "expected": "Valerie Braun", "actual": "Valerie Braun", "correct": true, "inputTokens": 6014, "outputTokens": 4, "latencyMs": 1394 }, { "questionId": "q79", "format": "toon", "model": "claude-haiku-4-5", "expected": "Valerie Braun", "actual": "Valerie Braun", "correct": true, "inputTokens": 6993, "outputTokens": 9, "latencyMs": 1202 }, { "questionId": "q79", "format": "csv", "model": "gpt-4o-mini", "expected": "Valerie Braun", "actual": "Valerie Braun", "correct": true, "inputTokens": 6782, "outputTokens": 4, "latencyMs": 1435 }, { "questionId": "q79", "format": "csv", "model": "claude-haiku-4-5", "expected": "Valerie Braun", "actual": "Valerie Braun", "correct": true, "inputTokens": 8414, "outputTokens": 9, "latencyMs": 1277 }, { "questionId": "q79", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "Valerie Braun", "actual": "Valerie Braun", "correct": true, "inputTokens": 9159, "outputTokens": 4, "latencyMs": 1564 }, { "questionId": "q79", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "Valerie Braun", "actual": "Valerie Braun", "correct": true, "inputTokens": 9289, "outputTokens": 9, "latencyMs": 1200 }, { "questionId": "q79", "format": "yaml", "model": "gpt-4o-mini", "expected": "Valerie Braun", "actual": "Valerie Braun", "correct": true, "inputTokens": 7374, "outputTokens": 4, "latencyMs": 1596 }, { "questionId": "q79", "format": "yaml", "model": "claude-haiku-4-5", "expected": "Valerie Braun", "actual": "Valerie Braun", "correct": true, "inputTokens": 8385, "outputTokens": 9, "latencyMs": 1151 }, { "questionId": "q80", "format": "json", "model": "gpt-4o-mini", "expected": "Anita Kozey", "actual": "Anita Kozey", "correct": true, "inputTokens": 9740, "outputTokens": 5, "latencyMs": 1458 }, { "questionId": "q80", "format": "json", "model": "claude-haiku-4-5", "expected": "Anita Kozey", "actual": "Anita Kozey", "correct": true, "inputTokens": 11907, "outputTokens": 9, "latencyMs": 1283 }, { "questionId": "q80", "format": "toon", "model": "gpt-4o-mini", "expected": "Anita Kozey", "actual": "Anita Kozey", "correct": true, "inputTokens": 6014, "outputTokens": 5, "latencyMs": 4702 }, { "questionId": "q80", "format": "toon", "model": "claude-haiku-4-5", "expected": "Anita Kozey", "actual": "Anita Kozey", "correct": true, "inputTokens": 6993, "outputTokens": 9, "latencyMs": 1360 }, { "questionId": "q80", "format": "csv", "model": "gpt-4o-mini", "expected": "Anita Kozey", "actual": "Anita Kozey", "correct": true, "inputTokens": 6782, "outputTokens": 5, "latencyMs": 6167 }, { "questionId": "q80", "format": "csv", "model": "claude-haiku-4-5", "expected": "Anita Kozey", "actual": "Anita Kozey", "correct": true, "inputTokens": 8414, "outputTokens": 9, "latencyMs": 1449 }, { "questionId": "q80", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "Anita Kozey", "actual": "Anita Kozey", "correct": true, "inputTokens": 9159, "outputTokens": 5, "latencyMs": 6096 }, { "questionId": "q80", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "Anita Kozey", "actual": "Anita Kozey", "correct": true, "inputTokens": 9289, "outputTokens": 9, "latencyMs": 1194 }, { "questionId": "q80", "format": "yaml", "model": "gpt-4o-mini", "expected": "Anita Kozey", "actual": "Anita Kozey", "correct": true, "inputTokens": 7374, "outputTokens": 5, "latencyMs": 7357 }, { "questionId": "q80", "format": "yaml", "model": "claude-haiku-4-5", "expected": "Anita Kozey", "actual": "Anita Kozey", "correct": true, "inputTokens": 8385, "outputTokens": 9, "latencyMs": 1213 }, { "questionId": "q81", "format": "json", "model": "gpt-4o-mini", "expected": "Elmer Kub PhD", "actual": "Elmer Kub PhD", "correct": true, "inputTokens": 9740, "outputTokens": 6, "latencyMs": 2539 }, { "questionId": "q81", "format": "json", "model": "claude-haiku-4-5", "expected": "Elmer Kub PhD", "actual": "Elmer Kub PhD", "correct": true, "inputTokens": 11907, "outputTokens": 10, "latencyMs": 1532 }, { "questionId": "q81", "format": "toon", "model": "gpt-4o-mini", "expected": "Elmer Kub PhD", "actual": "Elmer Kub PhD", "correct": true, "inputTokens": 6014, "outputTokens": 6, "latencyMs": 2960 }, { "questionId": "q81", "format": "toon", "model": "claude-haiku-4-5", "expected": "Elmer Kub PhD", "actual": "Elmer Kub PhD", "correct": true, "inputTokens": 6993, "outputTokens": 10, "latencyMs": 1547 }, { "questionId": "q81", "format": "csv", "model": "gpt-4o-mini", "expected": "Elmer Kub PhD", "actual": "Elmer Kub PhD", "correct": true, "inputTokens": 6782, "outputTokens": 6, "latencyMs": 1358 }, { "questionId": "q81", "format": "csv", "model": "claude-haiku-4-5", "expected": "Elmer Kub PhD", "actual": "Elmer Kub PhD", "correct": true, "inputTokens": 8414, "outputTokens": 10, "latencyMs": 1424 }, { "questionId": "q81", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "Elmer Kub PhD", "actual": "Elmer Kub PhD", "correct": true, "inputTokens": 9159, "outputTokens": 6, "latencyMs": 958 }, { "questionId": "q81", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "Elmer Kub PhD", "actual": "Elmer Kub PhD", "correct": true, "inputTokens": 9289, "outputTokens": 10, "latencyMs": 1381 }, { "questionId": "q81", "format": "yaml", "model": "gpt-4o-mini", "expected": "Elmer Kub PhD", "actual": "Elmer Kub PhD", "correct": true, "inputTokens": 7374, "outputTokens": 6, "latencyMs": 1372 }, { "questionId": "q81", "format": "yaml", "model": "claude-haiku-4-5", "expected": "Elmer Kub PhD", "actual": "Elmer Kub PhD", "correct": true, "inputTokens": 8385, "outputTokens": 10, "latencyMs": 1715 }, { "questionId": "q82", "format": "json", "model": "gpt-4o-mini", "expected": "Maxine Zemlak", "actual": "Maxine Zemlak", "correct": true, "inputTokens": 9740, "outputTokens": 5, "latencyMs": 1972 }, { "questionId": "q82", "format": "json", "model": "claude-haiku-4-5", "expected": "Maxine Zemlak", "actual": "Maxine Zemlak", "correct": true, "inputTokens": 11907, "outputTokens": 10, "latencyMs": 1315 }, { "questionId": "q82", "format": "toon", "model": "gpt-4o-mini", "expected": "Maxine Zemlak", "actual": "Maxine Zemlak", "correct": true, "inputTokens": 6014, "outputTokens": 5, "latencyMs": 1634 }, { "questionId": "q82", "format": "toon", "model": "claude-haiku-4-5", "expected": "Maxine Zemlak", "actual": "Maxine Zemlak", "correct": true, "inputTokens": 6993, "outputTokens": 10, "latencyMs": 1264 }, { "questionId": "q82", "format": "csv", "model": "gpt-4o-mini", "expected": "Maxine Zemlak", "actual": "Maxine Zemlak", "correct": true, "inputTokens": 6782, "outputTokens": 5, "latencyMs": 1153 }, { "questionId": "q82", "format": "csv", "model": "claude-haiku-4-5", "expected": "Maxine Zemlak", "actual": "Maxine Zemlak", "correct": true, "inputTokens": 8414, "outputTokens": 10, "latencyMs": 1252 }, { "questionId": "q82", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "Maxine Zemlak", "actual": "Maxine Zemlak", "correct": true, "inputTokens": 9159, "outputTokens": 5, "latencyMs": 1697 }, { "questionId": "q82", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "Maxine Zemlak", "actual": "Maxine Zemlak", "correct": true, "inputTokens": 9289, "outputTokens": 10, "latencyMs": 1198 }, { "questionId": "q82", "format": "yaml", "model": "gpt-4o-mini", "expected": "Maxine Zemlak", "actual": "Maxine Zemlak", "correct": true, "inputTokens": 7374, "outputTokens": 5, "latencyMs": 1854 }, { "questionId": "q82", "format": "yaml", "model": "claude-haiku-4-5", "expected": "Maxine Zemlak", "actual": "Maxine Zemlak", "correct": true, "inputTokens": 8385, "outputTokens": 10, "latencyMs": 1752 }, { "questionId": "q83", "format": "json", "model": "gpt-4o-mini", "expected": "Emanuel Littel", "actual": "Emanuel Littel", "correct": true, "inputTokens": 9740, "outputTokens": 5, "latencyMs": 2076 }, { "questionId": "q83", "format": "json", "model": "claude-haiku-4-5", "expected": "Emanuel Littel", "actual": "Emanuel Littel", "correct": true, "inputTokens": 11907, "outputTokens": 7, "latencyMs": 1398 }, { "questionId": "q83", "format": "toon", "model": "gpt-4o-mini", "expected": "Emanuel Littel", "actual": "Emanuel Littel", "correct": true, "inputTokens": 6014, "outputTokens": 5, "latencyMs": 2263 }, { "questionId": "q83", "format": "toon", "model": "claude-haiku-4-5", "expected": "Emanuel Littel", "actual": "Emanuel Littel", "correct": true, "inputTokens": 6993, "outputTokens": 7, "latencyMs": 3101 }, { "questionId": "q83", "format": "csv", "model": "gpt-4o-mini", "expected": "Emanuel Littel", "actual": "Emanuel Littel", "correct": true, "inputTokens": 6782, "outputTokens": 5, "latencyMs": 1453 }, { "questionId": "q83", "format": "csv", "model": "claude-haiku-4-5", "expected": "Emanuel Littel", "actual": "Emanuel Littel", "correct": true, "inputTokens": 8414, "outputTokens": 7, "latencyMs": 1265 }, { "questionId": "q83", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "Emanuel Littel", "actual": "Emanuel Littel", "correct": true, "inputTokens": 9159, "outputTokens": 5, "latencyMs": 8807 }, { "questionId": "q83", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "Emanuel Littel", "actual": "Emanuel Littel", "correct": true, "inputTokens": 9289, "outputTokens": 7, "latencyMs": 1097 }, { "questionId": "q83", "format": "yaml", "model": "gpt-4o-mini", "expected": "Emanuel Littel", "actual": "Emanuel Littel", "correct": true, "inputTokens": 7374, "outputTokens": 5, "latencyMs": 1667 }, { "questionId": "q83", "format": "yaml", "model": "claude-haiku-4-5", "expected": "Emanuel Littel", "actual": "Emanuel Littel", "correct": true, "inputTokens": 8385, "outputTokens": 7, "latencyMs": 1198 }, { "questionId": "q84", "format": "json", "model": "gpt-4o-mini", "expected": "Andrew Kling", "actual": "Andrew Kling", "correct": true, "inputTokens": 9740, "outputTokens": 3, "latencyMs": 2292 }, { "questionId": "q84", "format": "json", "model": "claude-haiku-4-5", "expected": "Andrew Kling", "actual": "Andrew Kling", "correct": true, "inputTokens": 11907, "outputTokens": 7, "latencyMs": 1202 }, { "questionId": "q84", "format": "toon", "model": "gpt-4o-mini", "expected": "Andrew Kling", "actual": "Andrew Kling", "correct": true, "inputTokens": 6014, "outputTokens": 3, "latencyMs": 1801 }, { "questionId": "q84", "format": "toon", "model": "claude-haiku-4-5", "expected": "Andrew Kling", "actual": "Andrew Kling", "correct": true, "inputTokens": 6993, "outputTokens": 7, "latencyMs": 1287 }, { "questionId": "q84", "format": "csv", "model": "gpt-4o-mini", "expected": "Andrew Kling", "actual": "Andrew Kling", "correct": true, "inputTokens": 6782, "outputTokens": 3, "latencyMs": 1340 }, { "questionId": "q84", "format": "csv", "model": "claude-haiku-4-5", "expected": "Andrew Kling", "actual": "Andrew Kling", "correct": true, "inputTokens": 8414, "outputTokens": 7, "latencyMs": 1163 }, { "questionId": "q84", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "Andrew Kling", "actual": "Andrew Kling", "correct": true, "inputTokens": 9159, "outputTokens": 3, "latencyMs": 2685 }, { "questionId": "q84", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "Andrew Kling", "actual": "Andrew Kling", "correct": true, "inputTokens": 9289, "outputTokens": 7, "latencyMs": 1397 }, { "questionId": "q84", "format": "yaml", "model": "gpt-4o-mini", "expected": "Andrew Kling", "actual": "Andrew Kling", "correct": true, "inputTokens": 7374, "outputTokens": 3, "latencyMs": 1289 }, { "questionId": "q84", "format": "yaml", "model": "claude-haiku-4-5", "expected": "Andrew Kling", "actual": "Andrew Kling", "correct": true, "inputTokens": 8385, "outputTokens": 7, "latencyMs": 1155 }, { "questionId": "q85", "format": "json", "model": "gpt-4o-mini", "expected": "Morris O'Hara", "actual": "Morris O'Hara", "correct": true, "inputTokens": 9740, "outputTokens": 6, "latencyMs": 1601 }, { "questionId": "q85", "format": "json", "model": "claude-haiku-4-5", "expected": "Morris O'Hara", "actual": "Morris O'Hara", "correct": true, "inputTokens": 11907, "outputTokens": 9, "latencyMs": 1340 }, { "questionId": "q85", "format": "toon", "model": "gpt-4o-mini", "expected": "Morris O'Hara", "actual": "Morris O'Hara", "correct": true, "inputTokens": 6014, "outputTokens": 6, "latencyMs": 3525 }, { "questionId": "q85", "format": "toon", "model": "claude-haiku-4-5", "expected": "Morris O'Hara", "actual": "Morris O'Hara", "correct": true, "inputTokens": 6993, "outputTokens": 9, "latencyMs": 1710 }, { "questionId": "q85", "format": "csv", "model": "gpt-4o-mini", "expected": "Morris O'Hara", "actual": "Morris O'Hara", "correct": true, "inputTokens": 6782, "outputTokens": 6, "latencyMs": 2333 }, { "questionId": "q85", "format": "csv", "model": "claude-haiku-4-5", "expected": "Morris O'Hara", "actual": "Morris O'Hara", "correct": true, "inputTokens": 8414, "outputTokens": 9, "latencyMs": 1168 }, { "questionId": "q85", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "Morris O'Hara", "actual": "Morris O'Hara", "correct": true, "inputTokens": 9159, "outputTokens": 6, "latencyMs": 1781 }, { "questionId": "q85", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "Morris O'Hara", "actual": "Morris O'Hara", "correct": true, "inputTokens": 9289, "outputTokens": 9, "latencyMs": 1552 }, { "questionId": "q85", "format": "yaml", "model": "gpt-4o-mini", "expected": "Morris O'Hara", "actual": "Morris O'Hara", "correct": true, "inputTokens": 7374, "outputTokens": 6, "latencyMs": 1584 }, { "questionId": "q85", "format": "yaml", "model": "claude-haiku-4-5", "expected": "Morris O'Hara", "actual": "Morris O'Hara", "correct": true, "inputTokens": 8385, "outputTokens": 9, "latencyMs": 1548 }, { "questionId": "q86", "format": "json", "model": "gpt-4o-mini", "expected": "Elijah Franecki", "actual": "Elijah Franecki", "correct": true, "inputTokens": 9740, "outputTokens": 6, "latencyMs": 7230 }, { "questionId": "q86", "format": "json", "model": "claude-haiku-4-5", "expected": "Elijah Franecki", "actual": "Elijah Franecki", "correct": true, "inputTokens": 11907, "outputTokens": 9, "latencyMs": 1933 }, { "questionId": "q86", "format": "toon", "model": "gpt-4o-mini", "expected": "Elijah Franecki", "actual": "Elijah Franecki", "correct": true, "inputTokens": 6014, "outputTokens": 6, "latencyMs": 1067 }, { "questionId": "q86", "format": "toon", "model": "claude-haiku-4-5", "expected": "Elijah Franecki", "actual": "Elijah Franecki", "correct": true, "inputTokens": 6993, "outputTokens": 9, "latencyMs": 1288 }, { "questionId": "q86", "format": "csv", "model": "gpt-4o-mini", "expected": "Elijah Franecki", "actual": "Elijah Franecki", "correct": true, "inputTokens": 6782, "outputTokens": 6, "latencyMs": 3954 }, { "questionId": "q86", "format": "csv", "model": "claude-haiku-4-5", "expected": "Elijah Franecki", "actual": "Elijah Franecki", "correct": true, "inputTokens": 8414, "outputTokens": 9, "latencyMs": 1314 }, { "questionId": "q86", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "Elijah Franecki", "actual": "Elijah Franecki", "correct": true, "inputTokens": 9159, "outputTokens": 6, "latencyMs": 1334 }, { "questionId": "q86", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "Elijah Franecki", "actual": "Elijah Franecki", "correct": true, "inputTokens": 9289, "outputTokens": 9, "latencyMs": 2441 }, { "questionId": "q86", "format": "yaml", "model": "gpt-4o-mini", "expected": "Elijah Franecki", "actual": "Elijah Franecki", "correct": true, "inputTokens": 7374, "outputTokens": 6, "latencyMs": 1650 }, { "questionId": "q86", "format": "yaml", "model": "claude-haiku-4-5", "expected": "Elijah Franecki", "actual": "Elijah Franecki", "correct": true, "inputTokens": 8385, "outputTokens": 9, "latencyMs": 1495 }, { "questionId": "q87", "format": "json", "model": "gpt-4o-mini", "expected": "Malcolm Erdman", "actual": "Malcolm Erdman", "correct": true, "inputTokens": 9740, "outputTokens": 5, "latencyMs": 1262 }, { "questionId": "q87", "format": "json", "model": "claude-haiku-4-5", "expected": "Malcolm Erdman", "actual": "Malcolm Erdman", "correct": true, "inputTokens": 11907, "outputTokens": 7, "latencyMs": 1367 }, { "questionId": "q87", "format": "toon", "model": "gpt-4o-mini", "expected": "Malcolm Erdman", "actual": "Malcolm Erdman", "correct": true, "inputTokens": 6014, "outputTokens": 5, "latencyMs": 1385 }, { "questionId": "q87", "format": "toon", "model": "claude-haiku-4-5", "expected": "Malcolm Erdman", "actual": "Malcolm Erdman", "correct": true, "inputTokens": 6993, "outputTokens": 7, "latencyMs": 1313 }, { "questionId": "q87", "format": "csv", "model": "gpt-4o-mini", "expected": "Malcolm Erdman", "actual": "Malcolm Erdman", "correct": true, "inputTokens": 6782, "outputTokens": 5, "latencyMs": 1141 }, { "questionId": "q87", "format": "csv", "model": "claude-haiku-4-5", "expected": "Malcolm Erdman", "actual": "Malcolm Erdman", "correct": true, "inputTokens": 8414, "outputTokens": 7, "latencyMs": 1300 }, { "questionId": "q87", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "Malcolm Erdman", "actual": "Malcolm Erdman", "correct": true, "inputTokens": 9159, "outputTokens": 5, "latencyMs": 3347 }, { "questionId": "q87", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "Malcolm Erdman", "actual": "Malcolm Erdman", "correct": true, "inputTokens": 9289, "outputTokens": 7, "latencyMs": 1457 }, { "questionId": "q87", "format": "yaml", "model": "gpt-4o-mini", "expected": "Malcolm Erdman", "actual": "Malcolm Erdman", "correct": true, "inputTokens": 7374, "outputTokens": 5, "latencyMs": 1276 }, { "questionId": "q87", "format": "yaml", "model": "claude-haiku-4-5", "expected": "Malcolm Erdman", "actual": "Malcolm Erdman", "correct": true, "inputTokens": 8385, "outputTokens": 7, "latencyMs": 1211 }, { "questionId": "q88", "format": "json", "model": "gpt-4o-mini", "expected": "Fannie Skiles", "actual": "Fannie Skiles", "correct": true, "inputTokens": 9740, "outputTokens": 5, "latencyMs": 1635 }, { "questionId": "q88", "format": "json", "model": "claude-haiku-4-5", "expected": "Fannie Skiles", "actual": "Fannie Skiles", "correct": true, "inputTokens": 11907, "outputTokens": 9, "latencyMs": 1582 }, { "questionId": "q88", "format": "toon", "model": "gpt-4o-mini", "expected": "Fannie Skiles", "actual": "Fannie Skiles", "correct": true, "inputTokens": 6014, "outputTokens": 5, "latencyMs": 1695 }, { "questionId": "q88", "format": "toon", "model": "claude-haiku-4-5", "expected": "Fannie Skiles", "actual": "Fannie Skiles", "correct": true, "inputTokens": 6993, "outputTokens": 9, "latencyMs": 1318 }, { "questionId": "q88", "format": "csv", "model": "gpt-4o-mini", "expected": "Fannie Skiles", "actual": "Fannie Skiles", "correct": true, "inputTokens": 6782, "outputTokens": 5, "latencyMs": 936 }, { "questionId": "q88", "format": "csv", "model": "claude-haiku-4-5", "expected": "Fannie Skiles", "actual": "Fannie Skiles", "correct": true, "inputTokens": 8414, "outputTokens": 9, "latencyMs": 1204 }, { "questionId": "q88", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "Fannie Skiles", "actual": "Fannie Skiles", "correct": true, "inputTokens": 9159, "outputTokens": 5, "latencyMs": 996 }, { "questionId": "q88", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "Fannie Skiles", "actual": "Fannie Skiles", "correct": true, "inputTokens": 9289, "outputTokens": 9, "latencyMs": 1261 }, { "questionId": "q88", "format": "yaml", "model": "gpt-4o-mini", "expected": "Fannie Skiles", "actual": "Fannie Skiles", "correct": true, "inputTokens": 7374, "outputTokens": 5, "latencyMs": 2276 }, { "questionId": "q88", "format": "yaml", "model": "claude-haiku-4-5", "expected": "Fannie Skiles", "actual": "Fannie Skiles", "correct": true, "inputTokens": 8385, "outputTokens": 9, "latencyMs": 1380 }, { "questionId": "q89", "format": "json", "model": "gpt-4o-mini", "expected": "Sonja Emmerich", "actual": "Sonja Emmerich", "correct": true, "inputTokens": 9740, "outputTokens": 6, "latencyMs": 1451 }, { "questionId": "q89", "format": "json", "model": "claude-haiku-4-5", "expected": "Sonja Emmerich", "actual": "Sonja Emmerich", "correct": true, "inputTokens": 11907, "outputTokens": 10, "latencyMs": 1977 }, { "questionId": "q89", "format": "toon", "model": "gpt-4o-mini", "expected": "Sonja Emmerich", "actual": "Sonja Emmerich", "correct": true, "inputTokens": 6014, "outputTokens": 6, "latencyMs": 1376 }, { "questionId": "q89", "format": "toon", "model": "claude-haiku-4-5", "expected": "Sonja Emmerich", "actual": "Sonja Emmerich", "correct": true, "inputTokens": 6993, "outputTokens": 10, "latencyMs": 1250 }, { "questionId": "q89", "format": "csv", "model": "gpt-4o-mini", "expected": "Sonja Emmerich", "actual": "Sonja Emmerich", "correct": true, "inputTokens": 6782, "outputTokens": 6, "latencyMs": 1273 }, { "questionId": "q89", "format": "csv", "model": "claude-haiku-4-5", "expected": "Sonja Emmerich", "actual": "Sonja Emmerich", "correct": true, "inputTokens": 8414, "outputTokens": 10, "latencyMs": 1359 }, { "questionId": "q89", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "Sonja Emmerich", "actual": "Sonja Emmerich", "correct": true, "inputTokens": 9159, "outputTokens": 6, "latencyMs": 1791 }, { "questionId": "q89", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "Sonja Emmerich", "actual": "Sonja Emmerich", "correct": true, "inputTokens": 9289, "outputTokens": 10, "latencyMs": 1273 }, { "questionId": "q89", "format": "yaml", "model": "gpt-4o-mini", "expected": "Sonja Emmerich", "actual": "Sonja Emmerich", "correct": true, "inputTokens": 7374, "outputTokens": 6, "latencyMs": 2832 }, { "questionId": "q89", "format": "yaml", "model": "claude-haiku-4-5", "expected": "Sonja Emmerich", "actual": "Sonja Emmerich", "correct": true, "inputTokens": 8385, "outputTokens": 10, "latencyMs": 1172 }, { "questionId": "q90", "format": "json", "model": "gpt-4o-mini", "expected": "Frank Emmerich DVM", "actual": "Frank Emmerich DVM", "correct": true, "inputTokens": 9740, "outputTokens": 7, "latencyMs": 1491 }, { "questionId": "q90", "format": "json", "model": "claude-haiku-4-5", "expected": "Frank Emmerich DVM", "actual": "Frank Emmerich DVM", "correct": true, "inputTokens": 11907, "outputTokens": 10, "latencyMs": 1414 }, { "questionId": "q90", "format": "toon", "model": "gpt-4o-mini", "expected": "Frank Emmerich DVM", "actual": "Frank Emmerich DVM", "correct": true, "inputTokens": 6014, "outputTokens": 7, "latencyMs": 1396 }, { "questionId": "q90", "format": "toon", "model": "claude-haiku-4-5", "expected": "Frank Emmerich DVM", "actual": "Frank Emmerich DVM", "correct": true, "inputTokens": 6993, "outputTokens": 10, "latencyMs": 1514 }, { "questionId": "q90", "format": "csv", "model": "gpt-4o-mini", "expected": "Frank Emmerich DVM", "actual": "Frank Emmerich DVM", "correct": true, "inputTokens": 6782, "outputTokens": 7, "latencyMs": 1573 }, { "questionId": "q90", "format": "csv", "model": "claude-haiku-4-5", "expected": "Frank Emmerich DVM", "actual": "Frank Emmerich DVM", "correct": true, "inputTokens": 8414, "outputTokens": 10, "latencyMs": 1284 }, { "questionId": "q90", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "Frank Emmerich DVM", "actual": "Frank Emmerich DVM", "correct": true, "inputTokens": 9159, "outputTokens": 7, "latencyMs": 5400 }, { "questionId": "q90", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "Frank Emmerich DVM", "actual": "Frank Emmerich DVM", "correct": true, "inputTokens": 9289, "outputTokens": 10, "latencyMs": 1486 }, { "questionId": "q90", "format": "yaml", "model": "gpt-4o-mini", "expected": "Frank Emmerich DVM", "actual": "Frank Emmerich DVM", "correct": true, "inputTokens": 7374, "outputTokens": 7, "latencyMs": 1420 }, { "questionId": "q90", "format": "yaml", "model": "claude-haiku-4-5", "expected": "Frank Emmerich DVM", "actual": "Frank Emmerich DVM", "correct": true, "inputTokens": 8385, "outputTokens": 10, "latencyMs": 1410 }, { "questionId": "q91", "format": "json", "model": "gpt-4o-mini", "expected": "Ronald Collins", "actual": "Ronald Collins", "correct": true, "inputTokens": 9740, "outputTokens": 4, "latencyMs": 1248 }, { "questionId": "q91", "format": "json", "model": "claude-haiku-4-5", "expected": "Ronald Collins", "actual": "Ronald Collins", "correct": true, "inputTokens": 11907, "outputTokens": 5, "latencyMs": 1177 }, { "questionId": "q91", "format": "toon", "model": "gpt-4o-mini", "expected": "Ronald Collins", "actual": "Ronald Collins", "correct": true, "inputTokens": 6014, "outputTokens": 4, "latencyMs": 1601 }, { "questionId": "q91", "format": "toon", "model": "claude-haiku-4-5", "expected": "Ronald Collins", "actual": "Ronald Collins", "correct": true, "inputTokens": 6993, "outputTokens": 5, "latencyMs": 1822 }, { "questionId": "q91", "format": "csv", "model": "gpt-4o-mini", "expected": "Ronald Collins", "actual": "Ronald Collins", "correct": true, "inputTokens": 6782, "outputTokens": 4, "latencyMs": 1103 }, { "questionId": "q91", "format": "csv", "model": "claude-haiku-4-5", "expected": "Ronald Collins", "actual": "Ronald Collins", "correct": true, "inputTokens": 8414, "outputTokens": 5, "latencyMs": 1247 }, { "questionId": "q91", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "Ronald Collins", "actual": "Ronald Collins", "correct": true, "inputTokens": 9159, "outputTokens": 4, "latencyMs": 1184 }, { "questionId": "q91", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "Ronald Collins", "actual": "Ronald Collins", "correct": true, "inputTokens": 9289, "outputTokens": 5, "latencyMs": 1137 }, { "questionId": "q91", "format": "yaml", "model": "gpt-4o-mini", "expected": "Ronald Collins", "actual": "Ronald Collins", "correct": true, "inputTokens": 7374, "outputTokens": 4, "latencyMs": 949 }, { "questionId": "q91", "format": "yaml", "model": "claude-haiku-4-5", "expected": "Ronald Collins", "actual": "Ronald Collins", "correct": true, "inputTokens": 8385, "outputTokens": 5, "latencyMs": 1143 }, { "questionId": "q92", "format": "json", "model": "gpt-4o-mini", "expected": "Jeannie Klein", "actual": "Jeannie Klein", "correct": true, "inputTokens": 9740, "outputTokens": 4, "latencyMs": 1021 }, { "questionId": "q92", "format": "json", "model": "claude-haiku-4-5", "expected": "Jeannie Klein", "actual": "Jeannie Klein", "correct": true, "inputTokens": 11907, "outputTokens": 8, "latencyMs": 1301 }, { "questionId": "q92", "format": "toon", "model": "gpt-4o-mini", "expected": "Jeannie Klein", "actual": "Jeannie Klein", "correct": true, "inputTokens": 6014, "outputTokens": 4, "latencyMs": 1254 }, { "questionId": "q92", "format": "toon", "model": "claude-haiku-4-5", "expected": "Jeannie Klein", "actual": "Jeannie Klein", "correct": true, "inputTokens": 6993, "outputTokens": 8, "latencyMs": 1375 }, { "questionId": "q92", "format": "csv", "model": "gpt-4o-mini", "expected": "Jeannie Klein", "actual": "Jeannie Klein", "correct": true, "inputTokens": 6782, "outputTokens": 4, "latencyMs": 1316 }, { "questionId": "q92", "format": "csv", "model": "claude-haiku-4-5", "expected": "Jeannie Klein", "actual": "Jeannie Klein", "correct": true, "inputTokens": 8414, "outputTokens": 8, "latencyMs": 2681 }, { "questionId": "q92", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "Jeannie Klein", "actual": "Jeannie Klein", "correct": true, "inputTokens": 9159, "outputTokens": 4, "latencyMs": 2427 }, { "questionId": "q92", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "Jeannie Klein", "actual": "Jeannie Klein", "correct": true, "inputTokens": 9289, "outputTokens": 8, "latencyMs": 1526 }, { "questionId": "q92", "format": "yaml", "model": "gpt-4o-mini", "expected": "Jeannie Klein", "actual": "Jeannie Klein", "correct": true, "inputTokens": 7374, "outputTokens": 4, "latencyMs": 1252 }, { "questionId": "q92", "format": "yaml", "model": "claude-haiku-4-5", "expected": "Jeannie Klein", "actual": "Jeannie Klein", "correct": true, "inputTokens": 8385, "outputTokens": 8, "latencyMs": 1324 }, { "questionId": "q93", "format": "json", "model": "gpt-4o-mini", "expected": "Joshua Watsica", "actual": "Joshua Watsica", "correct": true, "inputTokens": 9740, "outputTokens": 5, "latencyMs": 1606 }, { "questionId": "q93", "format": "json", "model": "claude-haiku-4-5", "expected": "Joshua Watsica", "actual": "Joshua Watsica", "correct": true, "inputTokens": 11907, "outputTokens": 8, "latencyMs": 1223 }, { "questionId": "q93", "format": "toon", "model": "gpt-4o-mini", "expected": "Joshua Watsica", "actual": "Joshua Watsica", "correct": true, "inputTokens": 6014, "outputTokens": 5, "latencyMs": 1965 }, { "questionId": "q93", "format": "toon", "model": "claude-haiku-4-5", "expected": "Joshua Watsica", "actual": "Joshua Watsica", "correct": true, "inputTokens": 6993, "outputTokens": 8, "latencyMs": 1300 }, { "questionId": "q93", "format": "csv", "model": "gpt-4o-mini", "expected": "Joshua Watsica", "actual": "Joshua Watsica", "correct": true, "inputTokens": 6782, "outputTokens": 5, "latencyMs": 1110 }, { "questionId": "q93", "format": "csv", "model": "claude-haiku-4-5", "expected": "Joshua Watsica", "actual": "Joshua Watsica", "correct": true, "inputTokens": 8414, "outputTokens": 8, "latencyMs": 1819 }, { "questionId": "q93", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "Joshua Watsica", "actual": "Joshua Watsica", "correct": true, "inputTokens": 9159, "outputTokens": 5, "latencyMs": 1010 }, { "questionId": "q93", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "Joshua Watsica", "actual": "Joshua Watsica", "correct": true, "inputTokens": 9289, "outputTokens": 8, "latencyMs": 1224 }, { "questionId": "q93", "format": "yaml", "model": "gpt-4o-mini", "expected": "Joshua Watsica", "actual": "Joshua Watsica", "correct": true, "inputTokens": 7374, "outputTokens": 5, "latencyMs": 1430 }, { "questionId": "q93", "format": "yaml", "model": "claude-haiku-4-5", "expected": "Joshua Watsica", "actual": "Joshua Watsica", "correct": true, "inputTokens": 8385, "outputTokens": 8, "latencyMs": 1158 }, { "questionId": "q94", "format": "json", "model": "gpt-4o-mini", "expected": "10", "actual": "10", "correct": true, "inputTokens": 9736, "outputTokens": 2, "latencyMs": 1352 }, { "questionId": "q94", "format": "json", "model": "claude-haiku-4-5", "expected": "10", "actual": "8", "correct": false, "inputTokens": 11902, "outputTokens": 5, "latencyMs": 1498 }, { "questionId": "q94", "format": "toon", "model": "gpt-4o-mini", "expected": "10", "actual": "12", "correct": false, "inputTokens": 6010, "outputTokens": 2, "latencyMs": 1249 }, { "questionId": "q94", "format": "toon", "model": "claude-haiku-4-5", "expected": "10", "actual": "8", "correct": false, "inputTokens": 6988, "outputTokens": 5, "latencyMs": 1080 }, { "questionId": "q94", "format": "csv", "model": "gpt-4o-mini", "expected": "10", "actual": "12", "correct": false, "inputTokens": 6778, "outputTokens": 2, "latencyMs": 1760 }, { "questionId": "q94", "format": "csv", "model": "claude-haiku-4-5", "expected": "10", "actual": "8", "correct": false, "inputTokens": 8409, "outputTokens": 5, "latencyMs": 1156 }, { "questionId": "q94", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "10", "actual": "10", "correct": true, "inputTokens": 9155, "outputTokens": 2, "latencyMs": 9923 }, { "questionId": "q94", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "10", "actual": "8", "correct": false, "inputTokens": 9284, "outputTokens": 5, "latencyMs": 1138 }, { "questionId": "q94", "format": "yaml", "model": "gpt-4o-mini", "expected": "10", "actual": "12", "correct": false, "inputTokens": 7370, "outputTokens": 2, "latencyMs": 1070 }, { "questionId": "q94", "format": "yaml", "model": "claude-haiku-4-5", "expected": "10", "actual": "8", "correct": false, "inputTokens": 8380, "outputTokens": 5, "latencyMs": 1114 }, { "questionId": "q95", "format": "json", "model": "gpt-4o-mini", "expected": "10", "actual": "10", "correct": true, "inputTokens": 9736, "outputTokens": 2, "latencyMs": 830 }, { "questionId": "q95", "format": "json", "model": "claude-haiku-4-5", "expected": "10", "actual": "8", "correct": false, "inputTokens": 11902, "outputTokens": 5, "latencyMs": 1085 }, { "questionId": "q95", "format": "toon", "model": "gpt-4o-mini", "expected": "10", "actual": "10", "correct": true, "inputTokens": 6010, "outputTokens": 2, "latencyMs": 2362 }, { "questionId": "q95", "format": "toon", "model": "claude-haiku-4-5", "expected": "10", "actual": "7", "correct": false, "inputTokens": 6988, "outputTokens": 5, "latencyMs": 1198 }, { "questionId": "q95", "format": "csv", "model": "gpt-4o-mini", "expected": "10", "actual": "10", "correct": true, "inputTokens": 6778, "outputTokens": 2, "latencyMs": 1630 }, { "questionId": "q95", "format": "csv", "model": "claude-haiku-4-5", "expected": "10", "actual": "8", "correct": false, "inputTokens": 8409, "outputTokens": 5, "latencyMs": 1219 }, { "questionId": "q95", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "10", "actual": "10", "correct": true, "inputTokens": 9155, "outputTokens": 2, "latencyMs": 2666 }, { "questionId": "q95", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "10", "actual": "8", "correct": false, "inputTokens": 9284, "outputTokens": 5, "latencyMs": 1044 }, { "questionId": "q95", "format": "yaml", "model": "gpt-4o-mini", "expected": "10", "actual": "12", "correct": false, "inputTokens": 7370, "outputTokens": 2, "latencyMs": 2187 }, { "questionId": "q95", "format": "yaml", "model": "claude-haiku-4-5", "expected": "10", "actual": "8", "correct": false, "inputTokens": 8380, "outputTokens": 5, "latencyMs": 1313 }, { "questionId": "q96", "format": "json", "model": "gpt-4o-mini", "expected": "10", "actual": "20", "correct": false, "inputTokens": 9737, "outputTokens": 2, "latencyMs": 1087 }, { "questionId": "q96", "format": "json", "model": "claude-haiku-4-5", "expected": "10", "actual": "8", "correct": false, "inputTokens": 11902, "outputTokens": 5, "latencyMs": 1292 }, { "questionId": "q96", "format": "toon", "model": "gpt-4o-mini", "expected": "10", "actual": "15", "correct": false, "inputTokens": 6011, "outputTokens": 2, "latencyMs": 1979 }, { "questionId": "q96", "format": "toon", "model": "claude-haiku-4-5", "expected": "10", "actual": "7", "correct": false, "inputTokens": 6988, "outputTokens": 5, "latencyMs": 1095 }, { "questionId": "q96", "format": "csv", "model": "gpt-4o-mini", "expected": "10", "actual": "15", "correct": false, "inputTokens": 6779, "outputTokens": 2, "latencyMs": 1385 }, { "questionId": "q96", "format": "csv", "model": "claude-haiku-4-5", "expected": "10", "actual": "8", "correct": false, "inputTokens": 8409, "outputTokens": 5, "latencyMs": 1507 }, { "questionId": "q96", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "10", "actual": "10", "correct": true, "inputTokens": 9156, "outputTokens": 2, "latencyMs": 1579 }, { "questionId": "q96", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "10", "actual": "8", "correct": false, "inputTokens": 9284, "outputTokens": 5, "latencyMs": 1365 }, { "questionId": "q96", "format": "yaml", "model": "gpt-4o-mini", "expected": "10", "actual": "20", "correct": false, "inputTokens": 7371, "outputTokens": 2, "latencyMs": 1661 }, { "questionId": "q96", "format": "yaml", "model": "claude-haiku-4-5", "expected": "10", "actual": "7", "correct": false, "inputTokens": 8380, "outputTokens": 5, "latencyMs": 1423 }, { "questionId": "q97", "format": "json", "model": "gpt-4o-mini", "expected": "10", "actual": "15", "correct": false, "inputTokens": 9737, "outputTokens": 2, "latencyMs": 1815 }, { "questionId": "q97", "format": "json", "model": "claude-haiku-4-5", "expected": "10", "actual": "10", "correct": true, "inputTokens": 11902, "outputTokens": 5, "latencyMs": 1345 }, { "questionId": "q97", "format": "toon", "model": "gpt-4o-mini", "expected": "10", "actual": "10", "correct": true, "inputTokens": 6011, "outputTokens": 2, "latencyMs": 2193 }, { "questionId": "q97", "format": "toon", "model": "claude-haiku-4-5", "expected": "10", "actual": "10", "correct": true, "inputTokens": 6988, "outputTokens": 5, "latencyMs": 1417 }, { "questionId": "q97", "format": "csv", "model": "gpt-4o-mini", "expected": "10", "actual": "15", "correct": false, "inputTokens": 6779, "outputTokens": 2, "latencyMs": 1721 }, { "questionId": "q97", "format": "csv", "model": "claude-haiku-4-5", "expected": "10", "actual": "10", "correct": true, "inputTokens": 8409, "outputTokens": 5, "latencyMs": 1114 }, { "questionId": "q97", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "10", "actual": "15", "correct": false, "inputTokens": 9156, "outputTokens": 2, "latencyMs": 2208 }, { "questionId": "q97", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "10", "actual": "10", "correct": true, "inputTokens": 9284, "outputTokens": 5, "latencyMs": 1895 }, { "questionId": "q97", "format": "yaml", "model": "gpt-4o-mini", "expected": "10", "actual": "15", "correct": false, "inputTokens": 7371, "outputTokens": 2, "latencyMs": 1287 }, { "questionId": "q97", "format": "yaml", "model": "claude-haiku-4-5", "expected": "10", "actual": "10", "correct": true, "inputTokens": 8380, "outputTokens": 5, "latencyMs": 1281 }, { "questionId": "q98", "format": "json", "model": "gpt-4o-mini", "expected": "10", "actual": "10", "correct": true, "inputTokens": 9737, "outputTokens": 2, "latencyMs": 1387 }, { "questionId": "q98", "format": "json", "model": "claude-haiku-4-5", "expected": "10", "actual": "8", "correct": false, "inputTokens": 11902, "outputTokens": 5, "latencyMs": 1243 }, { "questionId": "q98", "format": "toon", "model": "gpt-4o-mini", "expected": "10", "actual": "10", "correct": true, "inputTokens": 6011, "outputTokens": 2, "latencyMs": 1284 }, { "questionId": "q98", "format": "toon", "model": "claude-haiku-4-5", "expected": "10", "actual": "8", "correct": false, "inputTokens": 6988, "outputTokens": 5, "latencyMs": 1161 }, { "questionId": "q98", "format": "csv", "model": "gpt-4o-mini", "expected": "10", "actual": "15", "correct": false, "inputTokens": 6779, "outputTokens": 2, "latencyMs": 10406 }, { "questionId": "q98", "format": "csv", "model": "claude-haiku-4-5", "expected": "10", "actual": "10", "correct": true, "inputTokens": 8409, "outputTokens": 5, "latencyMs": 1335 }, { "questionId": "q98", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "10", "actual": "10", "correct": true, "inputTokens": 9156, "outputTokens": 2, "latencyMs": 1517 }, { "questionId": "q98", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "10", "actual": "10", "correct": true, "inputTokens": 9284, "outputTokens": 5, "latencyMs": 1702 }, { "questionId": "q98", "format": "yaml", "model": "gpt-4o-mini", "expected": "10", "actual": "10", "correct": true, "inputTokens": 7371, "outputTokens": 2, "latencyMs": 1676 }, { "questionId": "q98", "format": "yaml", "model": "claude-haiku-4-5", "expected": "10", "actual": "8", "correct": false, "inputTokens": 8380, "outputTokens": 5, "latencyMs": 1218 }, { "questionId": "q99", "format": "json", "model": "gpt-4o-mini", "expected": "42342.25", "actual": "$50,000.00", "correct": false, "inputTokens": 9737, "outputTokens": 7, "latencyMs": 1407 }, { "questionId": "q99", "format": "json", "model": "claude-haiku-4-5", "expected": "42342.25", "actual": "50,847.47", "correct": false, "inputTokens": 11902, "outputTokens": 9, "latencyMs": 1443 }, { "questionId": "q99", "format": "toon", "model": "gpt-4o-mini", "expected": "42342.25", "actual": "Total revenue across all orders is 42,195.36.", "correct": false, "inputTokens": 6011, "outputTokens": 14, "latencyMs": 1150 }, { "questionId": "q99", "format": "toon", "model": "claude-haiku-4-5", "expected": "42342.25", "actual": "41,847.47", "correct": false, "inputTokens": 6988, "outputTokens": 9, "latencyMs": 1774 }, { "questionId": "q99", "format": "csv", "model": "gpt-4o-mini", "expected": "42342.25", "actual": "$32,186.73", "correct": false, "inputTokens": 6779, "outputTokens": 7, "latencyMs": 2654 }, { "questionId": "q99", "format": "csv", "model": "claude-haiku-4-5", "expected": "42342.25", "actual": "48,847.47", "correct": false, "inputTokens": 8409, "outputTokens": 9, "latencyMs": 1386 }, { "questionId": "q99", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "42342.25", "actual": "$34,186.73", "correct": false, "inputTokens": 9156, "outputTokens": 7, "latencyMs": 1506 }, { "questionId": "q99", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "42342.25", "actual": "48,847.47", "correct": false, "inputTokens": 9284, "outputTokens": 9, "latencyMs": 1509 }, { "questionId": "q99", "format": "yaml", "model": "gpt-4o-mini", "expected": "42342.25", "actual": "Total revenue across all orders is 48780.73.", "correct": false, "inputTokens": 7371, "outputTokens": 13, "latencyMs": 1700 }, { "questionId": "q99", "format": "yaml", "model": "claude-haiku-4-5", "expected": "42342.25", "actual": "47,847.47", "correct": false, "inputTokens": 8380, "outputTokens": 9, "latencyMs": 1230 }, { "questionId": "q100", "format": "json", "model": "gpt-4o-mini", "expected": "44", "actual": "36", "correct": false, "inputTokens": 9739, "outputTokens": 2, "latencyMs": 1725 }, { "questionId": "q100", "format": "json", "model": "claude-haiku-4-5", "expected": "44", "actual": "48", "correct": false, "inputTokens": 11904, "outputTokens": 5, "latencyMs": 1377 }, { "questionId": "q100", "format": "toon", "model": "gpt-4o-mini", "expected": "44", "actual": "34", "correct": false, "inputTokens": 6013, "outputTokens": 2, "latencyMs": 1399 }, { "questionId": "q100", "format": "toon", "model": "claude-haiku-4-5", "expected": "44", "actual": "47", "correct": false, "inputTokens": 6990, "outputTokens": 5, "latencyMs": 1094 }, { "questionId": "q100", "format": "csv", "model": "gpt-4o-mini", "expected": "44", "actual": "34", "correct": false, "inputTokens": 6781, "outputTokens": 2, "latencyMs": 1617 }, { "questionId": "q100", "format": "csv", "model": "claude-haiku-4-5", "expected": "44", "actual": "47", "correct": false, "inputTokens": 8411, "outputTokens": 5, "latencyMs": 1344 }, { "questionId": "q100", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "44", "actual": "36", "correct": false, "inputTokens": 9158, "outputTokens": 2, "latencyMs": 2396 }, { "questionId": "q100", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "44", "actual": "48", "correct": false, "inputTokens": 9286, "outputTokens": 5, "latencyMs": 1145 }, { "questionId": "q100", "format": "yaml", "model": "gpt-4o-mini", "expected": "44", "actual": "36", "correct": false, "inputTokens": 7373, "outputTokens": 2, "latencyMs": 951 }, { "questionId": "q100", "format": "yaml", "model": "claude-haiku-4-5", "expected": "44", "actual": "45", "correct": false, "inputTokens": 8382, "outputTokens": 5, "latencyMs": 1311 }, { "questionId": "q101", "format": "json", "model": "gpt-4o-mini", "expected": "39", "actual": "34", "correct": false, "inputTokens": 9739, "outputTokens": 2, "latencyMs": 866 }, { "questionId": "q101", "format": "json", "model": "claude-haiku-4-5", "expected": "39", "actual": "38", "correct": false, "inputTokens": 11904, "outputTokens": 5, "latencyMs": 1964 }, { "questionId": "q101", "format": "toon", "model": "gpt-4o-mini", "expected": "39", "actual": "30", "correct": false, "inputTokens": 6013, "outputTokens": 2, "latencyMs": 1994 }, { "questionId": "q101", "format": "toon", "model": "claude-haiku-4-5", "expected": "39", "actual": "38", "correct": false, "inputTokens": 6990, "outputTokens": 5, "latencyMs": 1277 }, { "questionId": "q101", "format": "csv", "model": "gpt-4o-mini", "expected": "39", "actual": "32", "correct": false, "inputTokens": 6781, "outputTokens": 2, "latencyMs": 1884 }, { "questionId": "q101", "format": "csv", "model": "claude-haiku-4-5", "expected": "39", "actual": "38", "correct": false, "inputTokens": 8411, "outputTokens": 5, "latencyMs": 1282 }, { "questionId": "q101", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "39", "actual": "32", "correct": false, "inputTokens": 9158, "outputTokens": 2, "latencyMs": 1761 }, { "questionId": "q101", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "39", "actual": "38", "correct": false, "inputTokens": 9286, "outputTokens": 5, "latencyMs": 1250 }, { "questionId": "q101", "format": "yaml", "model": "gpt-4o-mini", "expected": "39", "actual": "32", "correct": false, "inputTokens": 7373, "outputTokens": 2, "latencyMs": 1316 }, { "questionId": "q101", "format": "yaml", "model": "claude-haiku-4-5", "expected": "39", "actual": "38", "correct": false, "inputTokens": 8382, "outputTokens": 5, "latencyMs": 1373 }, { "questionId": "q102", "format": "json", "model": "gpt-4o-mini", "expected": "32", "actual": "27", "correct": false, "inputTokens": 9739, "outputTokens": 2, "latencyMs": 1389 }, { "questionId": "q102", "format": "json", "model": "claude-haiku-4-5", "expected": "32", "actual": "28", "correct": false, "inputTokens": 11904, "outputTokens": 5, "latencyMs": 1215 }, { "questionId": "q102", "format": "toon", "model": "gpt-4o-mini", "expected": "32", "actual": "24", "correct": false, "inputTokens": 6013, "outputTokens": 2, "latencyMs": 1034 }, { "questionId": "q102", "format": "toon", "model": "claude-haiku-4-5", "expected": "32", "actual": "26", "correct": false, "inputTokens": 6990, "outputTokens": 5, "latencyMs": 1063 }, { "questionId": "q102", "format": "csv", "model": "gpt-4o-mini", "expected": "32", "actual": "25", "correct": false, "inputTokens": 6781, "outputTokens": 2, "latencyMs": 7312 }, { "questionId": "q102", "format": "csv", "model": "claude-haiku-4-5", "expected": "32", "actual": "28", "correct": false, "inputTokens": 8411, "outputTokens": 5, "latencyMs": 1387 }, { "questionId": "q102", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "32", "actual": "27", "correct": false, "inputTokens": 9158, "outputTokens": 2, "latencyMs": 1488 }, { "questionId": "q102", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "32", "actual": "28", "correct": false, "inputTokens": 9286, "outputTokens": 5, "latencyMs": 1268 }, { "questionId": "q102", "format": "yaml", "model": "gpt-4o-mini", "expected": "32", "actual": "27", "correct": false, "inputTokens": 7373, "outputTokens": 2, "latencyMs": 1274 }, { "questionId": "q102", "format": "yaml", "model": "claude-haiku-4-5", "expected": "32", "actual": "26", "correct": false, "inputTokens": 8382, "outputTokens": 5, "latencyMs": 1354 }, { "questionId": "q103", "format": "json", "model": "gpt-4o-mini", "expected": "6975", "actual": "6975", "correct": true, "inputTokens": 3713, "outputTokens": 3, "latencyMs": 1330 }, { "questionId": "q103", "format": "json", "model": "claude-haiku-4-5", "expected": "6975", "actual": "6975", "correct": true, "inputTokens": 4080, "outputTokens": 6, "latencyMs": 1437 }, { "questionId": "q103", "format": "toon", "model": "gpt-4o-mini", "expected": "6975", "actual": "6975", "correct": true, "inputTokens": 1564, "outputTokens": 3, "latencyMs": 1341 }, { "questionId": "q103", "format": "toon", "model": "claude-haiku-4-5", "expected": "6975", "actual": "6975", "correct": true, "inputTokens": 1509, "outputTokens": 6, "latencyMs": 1231 }, { "questionId": "q103", "format": "csv", "model": "gpt-4o-mini", "expected": "6975", "actual": "6975", "correct": true, "inputTokens": 1442, "outputTokens": 3, "latencyMs": 2515 }, { "questionId": "q103", "format": "csv", "model": "claude-haiku-4-5", "expected": "6975", "actual": "6975", "correct": true, "inputTokens": 1445, "outputTokens": 6, "latencyMs": 1162 }, { "questionId": "q103", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "6975", "actual": "6975", "correct": true, "inputTokens": 3830, "outputTokens": 3, "latencyMs": 868 }, { "questionId": "q103", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "6975", "actual": "6975", "correct": true, "inputTokens": 3415, "outputTokens": 6, "latencyMs": 1149 }, { "questionId": "q103", "format": "yaml", "model": "gpt-4o-mini", "expected": "6975", "actual": "6975", "correct": true, "inputTokens": 2986, "outputTokens": 3, "latencyMs": 1183 }, { "questionId": "q103", "format": "yaml", "model": "claude-haiku-4-5", "expected": "6975", "actual": "6975", "correct": true, "inputTokens": 3110, "outputTokens": 6, "latencyMs": 1119 }, { "questionId": "q104", "format": "json", "model": "gpt-4o-mini", "expected": "6686.23", "actual": "6686.23", "correct": true, "inputTokens": 3712, "outputTokens": 5, "latencyMs": 1273 }, { "questionId": "q104", "format": "json", "model": "claude-haiku-4-5", "expected": "6686.23", "actual": "6686.23", "correct": true, "inputTokens": 4079, "outputTokens": 8, "latencyMs": 1371 }, { "questionId": "q104", "format": "toon", "model": "gpt-4o-mini", "expected": "6686.23", "actual": "6686.23", "correct": true, "inputTokens": 1563, "outputTokens": 5, "latencyMs": 2052 }, { "questionId": "q104", "format": "toon", "model": "claude-haiku-4-5", "expected": "6686.23", "actual": "6686.23", "correct": true, "inputTokens": 1508, "outputTokens": 8, "latencyMs": 997 }, { "questionId": "q104", "format": "csv", "model": "gpt-4o-mini", "expected": "6686.23", "actual": "6686.23", "correct": true, "inputTokens": 1441, "outputTokens": 5, "latencyMs": 1152 }, { "questionId": "q104", "format": "csv", "model": "claude-haiku-4-5", "expected": "6686.23", "actual": "6686.23", "correct": true, "inputTokens": 1444, "outputTokens": 8, "latencyMs": 1188 }, { "questionId": "q104", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "6686.23", "actual": "6686.23", "correct": true, "inputTokens": 3829, "outputTokens": 5, "latencyMs": 1259 }, { "questionId": "q104", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "6686.23", "actual": "6686.23", "correct": true, "inputTokens": 3414, "outputTokens": 8, "latencyMs": 1239 }, { "questionId": "q104", "format": "yaml", "model": "gpt-4o-mini", "expected": "6686.23", "actual": "6686.23", "correct": true, "inputTokens": 2985, "outputTokens": 5, "latencyMs": 1096 }, { "questionId": "q104", "format": "yaml", "model": "claude-haiku-4-5", "expected": "6686.23", "actual": "6686.23", "correct": true, "inputTokens": 3109, "outputTokens": 8, "latencyMs": 1247 }, { "questionId": "q105", "format": "json", "model": "gpt-4o-mini", "expected": "7500", "actual": "7500", "correct": true, "inputTokens": 3713, "outputTokens": 3, "latencyMs": 1354 }, { "questionId": "q105", "format": "json", "model": "claude-haiku-4-5", "expected": "7500", "actual": "7500", "correct": true, "inputTokens": 4080, "outputTokens": 6, "latencyMs": 1083 }, { "questionId": "q105", "format": "toon", "model": "gpt-4o-mini", "expected": "7500", "actual": "7500", "correct": true, "inputTokens": 1564, "outputTokens": 3, "latencyMs": 869 }, { "questionId": "q105", "format": "toon", "model": "claude-haiku-4-5", "expected": "7500", "actual": "7500", "correct": true, "inputTokens": 1509, "outputTokens": 6, "latencyMs": 1051 }, { "questionId": "q105", "format": "csv", "model": "gpt-4o-mini", "expected": "7500", "actual": "7500", "correct": true, "inputTokens": 1442, "outputTokens": 3, "latencyMs": 1528 }, { "questionId": "q105", "format": "csv", "model": "claude-haiku-4-5", "expected": "7500", "actual": "7500", "correct": true, "inputTokens": 1445, "outputTokens": 6, "latencyMs": 1126 }, { "questionId": "q105", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "7500", "actual": "7500", "correct": true, "inputTokens": 3830, "outputTokens": 3, "latencyMs": 1136 }, { "questionId": "q105", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "7500", "actual": "7500", "correct": true, "inputTokens": 3415, "outputTokens": 6, "latencyMs": 1121 }, { "questionId": "q105", "format": "yaml", "model": "gpt-4o-mini", "expected": "7500", "actual": "7500", "correct": true, "inputTokens": 2986, "outputTokens": 3, "latencyMs": 1217 }, { "questionId": "q105", "format": "yaml", "model": "claude-haiku-4-5", "expected": "7500", "actual": "7500", "correct": true, "inputTokens": 3110, "outputTokens": 6, "latencyMs": 1099 }, { "questionId": "q106", "format": "json", "model": "gpt-4o-mini", "expected": "14297.05", "actual": "14297.05", "correct": true, "inputTokens": 3712, "outputTokens": 5, "latencyMs": 1416 }, { "questionId": "q106", "format": "json", "model": "claude-haiku-4-5", "expected": "14297.05", "actual": "14297.05", "correct": true, "inputTokens": 4079, "outputTokens": 8, "latencyMs": 1526 }, { "questionId": "q106", "format": "toon", "model": "gpt-4o-mini", "expected": "14297.05", "actual": "14297.05", "correct": true, "inputTokens": 1563, "outputTokens": 5, "latencyMs": 1350 }, { "questionId": "q106", "format": "toon", "model": "claude-haiku-4-5", "expected": "14297.05", "actual": "14297.05", "correct": true, "inputTokens": 1508, "outputTokens": 8, "latencyMs": 1330 }, { "questionId": "q106", "format": "csv", "model": "gpt-4o-mini", "expected": "14297.05", "actual": "14297.05", "correct": true, "inputTokens": 1441, "outputTokens": 5, "latencyMs": 2337 }, { "questionId": "q106", "format": "csv", "model": "claude-haiku-4-5", "expected": "14297.05", "actual": "14297.05", "correct": true, "inputTokens": 1444, "outputTokens": 8, "latencyMs": 1171 }, { "questionId": "q106", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "14297.05", "actual": "14297.05", "correct": true, "inputTokens": 3829, "outputTokens": 5, "latencyMs": 3128 }, { "questionId": "q106", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "14297.05", "actual": "14297.05", "correct": true, "inputTokens": 3414, "outputTokens": 8, "latencyMs": 1151 }, { "questionId": "q106", "format": "yaml", "model": "gpt-4o-mini", "expected": "14297.05", "actual": "14297.05", "correct": true, "inputTokens": 2985, "outputTokens": 5, "latencyMs": 1988 }, { "questionId": "q106", "format": "yaml", "model": "claude-haiku-4-5", "expected": "14297.05", "actual": "14297.05", "correct": true, "inputTokens": 3109, "outputTokens": 8, "latencyMs": 1166 }, { "questionId": "q107", "format": "json", "model": "gpt-4o-mini", "expected": "6692", "actual": "6692", "correct": true, "inputTokens": 3713, "outputTokens": 3, "latencyMs": 2217 }, { "questionId": "q107", "format": "json", "model": "claude-haiku-4-5", "expected": "6692", "actual": "6692", "correct": true, "inputTokens": 4080, "outputTokens": 6, "latencyMs": 1114 }, { "questionId": "q107", "format": "toon", "model": "gpt-4o-mini", "expected": "6692", "actual": "6692", "correct": true, "inputTokens": 1564, "outputTokens": 3, "latencyMs": 1360 }, { "questionId": "q107", "format": "toon", "model": "claude-haiku-4-5", "expected": "6692", "actual": "6692", "correct": true, "inputTokens": 1509, "outputTokens": 6, "latencyMs": 1079 }, { "questionId": "q107", "format": "csv", "model": "gpt-4o-mini", "expected": "6692", "actual": "6692", "correct": true, "inputTokens": 1442, "outputTokens": 3, "latencyMs": 1951 }, { "questionId": "q107", "format": "csv", "model": "claude-haiku-4-5", "expected": "6692", "actual": "6692", "correct": true, "inputTokens": 1445, "outputTokens": 6, "latencyMs": 1173 }, { "questionId": "q107", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "6692", "actual": "6692", "correct": true, "inputTokens": 3830, "outputTokens": 3, "latencyMs": 1076 }, { "questionId": "q107", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "6692", "actual": "6692", "correct": true, "inputTokens": 3415, "outputTokens": 6, "latencyMs": 1098 }, { "questionId": "q107", "format": "yaml", "model": "gpt-4o-mini", "expected": "6692", "actual": "6692", "correct": true, "inputTokens": 2986, "outputTokens": 3, "latencyMs": 1101 }, { "questionId": "q107", "format": "yaml", "model": "claude-haiku-4-5", "expected": "6692", "actual": "6692", "correct": true, "inputTokens": 3110, "outputTokens": 6, "latencyMs": 1254 }, { "questionId": "q108", "format": "json", "model": "gpt-4o-mini", "expected": "9302.76", "actual": "9302.76", "correct": true, "inputTokens": 3712, "outputTokens": 5, "latencyMs": 2041 }, { "questionId": "q108", "format": "json", "model": "claude-haiku-4-5", "expected": "9302.76", "actual": "9302.76", "correct": true, "inputTokens": 4079, "outputTokens": 8, "latencyMs": 1405 }, { "questionId": "q108", "format": "toon", "model": "gpt-4o-mini", "expected": "9302.76", "actual": "9302.76", "correct": true, "inputTokens": 1563, "outputTokens": 5, "latencyMs": 1170 }, { "questionId": "q108", "format": "toon", "model": "claude-haiku-4-5", "expected": "9302.76", "actual": "9302.76", "correct": true, "inputTokens": 1508, "outputTokens": 8, "latencyMs": 1161 }, { "questionId": "q108", "format": "csv", "model": "gpt-4o-mini", "expected": "9302.76", "actual": "9302.76", "correct": true, "inputTokens": 1441, "outputTokens": 5, "latencyMs": 1326 }, { "questionId": "q108", "format": "csv", "model": "claude-haiku-4-5", "expected": "9302.76", "actual": "9302.76", "correct": true, "inputTokens": 1444, "outputTokens": 8, "latencyMs": 1259 }, { "questionId": "q108", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "9302.76", "actual": "9302.76", "correct": true, "inputTokens": 3829, "outputTokens": 5, "latencyMs": 3006 }, { "questionId": "q108", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "9302.76", "actual": "9302.76", "correct": true, "inputTokens": 3414, "outputTokens": 8, "latencyMs": 1461 }, { "questionId": "q108", "format": "yaml", "model": "gpt-4o-mini", "expected": "9302.76", "actual": "9302.76", "correct": true, "inputTokens": 2985, "outputTokens": 5, "latencyMs": 3824 }, { "questionId": "q108", "format": "yaml", "model": "claude-haiku-4-5", "expected": "9302.76", "actual": "9302.76", "correct": true, "inputTokens": 3109, "outputTokens": 8, "latencyMs": 1391 }, { "questionId": "q109", "format": "json", "model": "gpt-4o-mini", "expected": "3285", "actual": "3285", "correct": true, "inputTokens": 3713, "outputTokens": 3, "latencyMs": 1091 }, { "questionId": "q109", "format": "json", "model": "claude-haiku-4-5", "expected": "3285", "actual": "3285", "correct": true, "inputTokens": 4080, "outputTokens": 6, "latencyMs": 1188 }, { "questionId": "q109", "format": "toon", "model": "gpt-4o-mini", "expected": "3285", "actual": "3285", "correct": true, "inputTokens": 1564, "outputTokens": 3, "latencyMs": 1450 }, { "questionId": "q109", "format": "toon", "model": "claude-haiku-4-5", "expected": "3285", "actual": "3285", "correct": true, "inputTokens": 1509, "outputTokens": 6, "latencyMs": 1614 }, { "questionId": "q109", "format": "csv", "model": "gpt-4o-mini", "expected": "3285", "actual": "3285", "correct": true, "inputTokens": 1442, "outputTokens": 3, "latencyMs": 1642 }, { "questionId": "q109", "format": "csv", "model": "claude-haiku-4-5", "expected": "3285", "actual": "3285", "correct": true, "inputTokens": 1445, "outputTokens": 6, "latencyMs": 1311 }, { "questionId": "q109", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "3285", "actual": "3285", "correct": true, "inputTokens": 3830, "outputTokens": 3, "latencyMs": 1201 }, { "questionId": "q109", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "3285", "actual": "3285", "correct": true, "inputTokens": 3415, "outputTokens": 6, "latencyMs": 1261 }, { "questionId": "q109", "format": "yaml", "model": "gpt-4o-mini", "expected": "3285", "actual": "3285", "correct": true, "inputTokens": 2986, "outputTokens": 3, "latencyMs": 856 }, { "questionId": "q109", "format": "yaml", "model": "claude-haiku-4-5", "expected": "3285", "actual": "3285", "correct": true, "inputTokens": 3110, "outputTokens": 6, "latencyMs": 980 }, { "questionId": "q110", "format": "json", "model": "gpt-4o-mini", "expected": "3826.93", "actual": "3826.93", "correct": true, "inputTokens": 3712, "outputTokens": 5, "latencyMs": 3090 }, { "questionId": "q110", "format": "json", "model": "claude-haiku-4-5", "expected": "3826.93", "actual": "3826.93", "correct": true, "inputTokens": 4079, "outputTokens": 8, "latencyMs": 1123 }, { "questionId": "q110", "format": "toon", "model": "gpt-4o-mini", "expected": "3826.93", "actual": "3826.93", "correct": true, "inputTokens": 1563, "outputTokens": 5, "latencyMs": 2911 }, { "questionId": "q110", "format": "toon", "model": "claude-haiku-4-5", "expected": "3826.93", "actual": "3826.93", "correct": true, "inputTokens": 1508, "outputTokens": 8, "latencyMs": 979 }, { "questionId": "q110", "format": "csv", "model": "gpt-4o-mini", "expected": "3826.93", "actual": "3826.93", "correct": true, "inputTokens": 1441, "outputTokens": 5, "latencyMs": 1118 }, { "questionId": "q110", "format": "csv", "model": "claude-haiku-4-5", "expected": "3826.93", "actual": "3826.93", "correct": true, "inputTokens": 1444, "outputTokens": 8, "latencyMs": 943 }, { "questionId": "q110", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "3826.93", "actual": "3826.93", "correct": true, "inputTokens": 3829, "outputTokens": 5, "latencyMs": 2639 }, { "questionId": "q110", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "3826.93", "actual": "3826.93", "correct": true, "inputTokens": 3414, "outputTokens": 8, "latencyMs": 1187 }, { "questionId": "q110", "format": "yaml", "model": "gpt-4o-mini", "expected": "3826.93", "actual": "3826.93", "correct": true, "inputTokens": 2985, "outputTokens": 5, "latencyMs": 2402 }, { "questionId": "q110", "format": "yaml", "model": "claude-haiku-4-5", "expected": "3826.93", "actual": "3826.93", "correct": true, "inputTokens": 3109, "outputTokens": 8, "latencyMs": 1723 }, { "questionId": "q111", "format": "json", "model": "gpt-4o-mini", "expected": "6191", "actual": "6191", "correct": true, "inputTokens": 3713, "outputTokens": 3, "latencyMs": 2401 }, { "questionId": "q111", "format": "json", "model": "claude-haiku-4-5", "expected": "6191", "actual": "6191", "correct": true, "inputTokens": 4080, "outputTokens": 6, "latencyMs": 1117 }, { "questionId": "q111", "format": "toon", "model": "gpt-4o-mini", "expected": "6191", "actual": "6191", "correct": true, "inputTokens": 1564, "outputTokens": 3, "latencyMs": 1568 }, { "questionId": "q111", "format": "toon", "model": "claude-haiku-4-5", "expected": "6191", "actual": "6191", "correct": true, "inputTokens": 1509, "outputTokens": 6, "latencyMs": 1132 }, { "questionId": "q111", "format": "csv", "model": "gpt-4o-mini", "expected": "6191", "actual": "6191", "correct": true, "inputTokens": 1442, "outputTokens": 3, "latencyMs": 1478 }, { "questionId": "q111", "format": "csv", "model": "claude-haiku-4-5", "expected": "6191", "actual": "6191", "correct": true, "inputTokens": 1445, "outputTokens": 6, "latencyMs": 1831 }, { "questionId": "q111", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "6191", "actual": "6191", "correct": true, "inputTokens": 3830, "outputTokens": 3, "latencyMs": 1631 }, { "questionId": "q111", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "6191", "actual": "6191", "correct": true, "inputTokens": 3415, "outputTokens": 6, "latencyMs": 1371 }, { "questionId": "q111", "format": "yaml", "model": "gpt-4o-mini", "expected": "6191", "actual": "6191", "correct": true, "inputTokens": 2986, "outputTokens": 3, "latencyMs": 1209 }, { "questionId": "q111", "format": "yaml", "model": "claude-haiku-4-5", "expected": "6191", "actual": "6191", "correct": true, "inputTokens": 3110, "outputTokens": 6, "latencyMs": 1411 }, { "questionId": "q112", "format": "json", "model": "gpt-4o-mini", "expected": "1854.66", "actual": "1854.66", "correct": true, "inputTokens": 3712, "outputTokens": 5, "latencyMs": 1773 }, { "questionId": "q112", "format": "json", "model": "claude-haiku-4-5", "expected": "1854.66", "actual": "1854.66", "correct": true, "inputTokens": 4079, "outputTokens": 8, "latencyMs": 1090 }, { "questionId": "q112", "format": "toon", "model": "gpt-4o-mini", "expected": "1854.66", "actual": "1854.66", "correct": true, "inputTokens": 1563, "outputTokens": 5, "latencyMs": 1354 }, { "questionId": "q112", "format": "toon", "model": "claude-haiku-4-5", "expected": "1854.66", "actual": "1854.66", "correct": true, "inputTokens": 1508, "outputTokens": 8, "latencyMs": 1095 }, { "questionId": "q112", "format": "csv", "model": "gpt-4o-mini", "expected": "1854.66", "actual": "1854.66", "correct": true, "inputTokens": 1441, "outputTokens": 5, "latencyMs": 1135 }, { "questionId": "q112", "format": "csv", "model": "claude-haiku-4-5", "expected": "1854.66", "actual": "1854.66", "correct": true, "inputTokens": 1444, "outputTokens": 8, "latencyMs": 976 }, { "questionId": "q112", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "1854.66", "actual": "1854.66", "correct": true, "inputTokens": 3829, "outputTokens": 5, "latencyMs": 1311 }, { "questionId": "q112", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "1854.66", "actual": "1854.66", "correct": true, "inputTokens": 3414, "outputTokens": 8, "latencyMs": 1287 }, { "questionId": "q112", "format": "yaml", "model": "gpt-4o-mini", "expected": "1854.66", "actual": "1854.66", "correct": true, "inputTokens": 2985, "outputTokens": 5, "latencyMs": 1288 }, { "questionId": "q112", "format": "yaml", "model": "claude-haiku-4-5", "expected": "1854.66", "actual": "1854.66", "correct": true, "inputTokens": 3109, "outputTokens": 8, "latencyMs": 1157 }, { "questionId": "q113", "format": "json", "model": "gpt-4o-mini", "expected": "4696", "actual": "4696", "correct": true, "inputTokens": 3713, "outputTokens": 3, "latencyMs": 1328 }, { "questionId": "q113", "format": "json", "model": "claude-haiku-4-5", "expected": "4696", "actual": "4696", "correct": true, "inputTokens": 4080, "outputTokens": 6, "latencyMs": 1068 }, { "questionId": "q113", "format": "toon", "model": "gpt-4o-mini", "expected": "4696", "actual": "4696", "correct": true, "inputTokens": 1564, "outputTokens": 3, "latencyMs": 1020 }, { "questionId": "q113", "format": "toon", "model": "claude-haiku-4-5", "expected": "4696", "actual": "4696", "correct": true, "inputTokens": 1509, "outputTokens": 6, "latencyMs": 1069 }, { "questionId": "q113", "format": "csv", "model": "gpt-4o-mini", "expected": "4696", "actual": "4696", "correct": true, "inputTokens": 1442, "outputTokens": 3, "latencyMs": 968 }, { "questionId": "q113", "format": "csv", "model": "claude-haiku-4-5", "expected": "4696", "actual": "4696", "correct": true, "inputTokens": 1445, "outputTokens": 6, "latencyMs": 1436 }, { "questionId": "q113", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "4696", "actual": "4696", "correct": true, "inputTokens": 3830, "outputTokens": 3, "latencyMs": 1171 }, { "questionId": "q113", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "4696", "actual": "4696", "correct": true, "inputTokens": 3415, "outputTokens": 6, "latencyMs": 1273 }, { "questionId": "q113", "format": "yaml", "model": "gpt-4o-mini", "expected": "4696", "actual": "4696", "correct": true, "inputTokens": 2986, "outputTokens": 3, "latencyMs": 1788 }, { "questionId": "q113", "format": "yaml", "model": "claude-haiku-4-5", "expected": "4696", "actual": "4696", "correct": true, "inputTokens": 3110, "outputTokens": 6, "latencyMs": 1050 }, { "questionId": "q114", "format": "json", "model": "gpt-4o-mini", "expected": "4211.6", "actual": "4211.6", "correct": true, "inputTokens": 3712, "outputTokens": 5, "latencyMs": 1414 }, { "questionId": "q114", "format": "json", "model": "claude-haiku-4-5", "expected": "4211.6", "actual": "4211.6", "correct": true, "inputTokens": 4079, "outputTokens": 8, "latencyMs": 1192 }, { "questionId": "q114", "format": "toon", "model": "gpt-4o-mini", "expected": "4211.6", "actual": "4211.6", "correct": true, "inputTokens": 1563, "outputTokens": 5, "latencyMs": 893 }, { "questionId": "q114", "format": "toon", "model": "claude-haiku-4-5", "expected": "4211.6", "actual": "4211.6", "correct": true, "inputTokens": 1508, "outputTokens": 8, "latencyMs": 1065 }, { "questionId": "q114", "format": "csv", "model": "gpt-4o-mini", "expected": "4211.6", "actual": "4211.6", "correct": true, "inputTokens": 1441, "outputTokens": 5, "latencyMs": 1155 }, { "questionId": "q114", "format": "csv", "model": "claude-haiku-4-5", "expected": "4211.6", "actual": "4211.6", "correct": true, "inputTokens": 1444, "outputTokens": 8, "latencyMs": 1842 }, { "questionId": "q114", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "4211.6", "actual": "4211.6", "correct": true, "inputTokens": 3829, "outputTokens": 5, "latencyMs": 2740 }, { "questionId": "q114", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "4211.6", "actual": "4211.6", "correct": true, "inputTokens": 3414, "outputTokens": 8, "latencyMs": 1295 }, { "questionId": "q114", "format": "yaml", "model": "gpt-4o-mini", "expected": "4211.6", "actual": "4211.6", "correct": true, "inputTokens": 2985, "outputTokens": 5, "latencyMs": 1053 }, { "questionId": "q114", "format": "yaml", "model": "claude-haiku-4-5", "expected": "4211.6", "actual": "4211.6", "correct": true, "inputTokens": 3109, "outputTokens": 8, "latencyMs": 1118 }, { "questionId": "q115", "format": "json", "model": "gpt-4o-mini", "expected": "6196", "actual": "6196", "correct": true, "inputTokens": 3713, "outputTokens": 3, "latencyMs": 1452 }, { "questionId": "q115", "format": "json", "model": "claude-haiku-4-5", "expected": "6196", "actual": "6196", "correct": true, "inputTokens": 4080, "outputTokens": 6, "latencyMs": 1272 }, { "questionId": "q115", "format": "toon", "model": "gpt-4o-mini", "expected": "6196", "actual": "6196", "correct": true, "inputTokens": 1564, "outputTokens": 3, "latencyMs": 1039 }, { "questionId": "q115", "format": "toon", "model": "claude-haiku-4-5", "expected": "6196", "actual": "6196", "correct": true, "inputTokens": 1509, "outputTokens": 6, "latencyMs": 1155 }, { "questionId": "q115", "format": "csv", "model": "gpt-4o-mini", "expected": "6196", "actual": "6196", "correct": true, "inputTokens": 1442, "outputTokens": 3, "latencyMs": 796 }, { "questionId": "q115", "format": "csv", "model": "claude-haiku-4-5", "expected": "6196", "actual": "6196", "correct": true, "inputTokens": 1445, "outputTokens": 6, "latencyMs": 1048 }, { "questionId": "q115", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "6196", "actual": "6196", "correct": true, "inputTokens": 3830, "outputTokens": 3, "latencyMs": 2282 }, { "questionId": "q115", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "6196", "actual": "6196", "correct": true, "inputTokens": 3415, "outputTokens": 6, "latencyMs": 1592 }, { "questionId": "q115", "format": "yaml", "model": "gpt-4o-mini", "expected": "6196", "actual": "6196", "correct": true, "inputTokens": 2986, "outputTokens": 3, "latencyMs": 2691 }, { "questionId": "q115", "format": "yaml", "model": "claude-haiku-4-5", "expected": "6196", "actual": "6196", "correct": true, "inputTokens": 3110, "outputTokens": 6, "latencyMs": 1126 }, { "questionId": "q116", "format": "json", "model": "gpt-4o-mini", "expected": "6105.3", "actual": "6105.3", "correct": true, "inputTokens": 3712, "outputTokens": 5, "latencyMs": 1288 }, { "questionId": "q116", "format": "json", "model": "claude-haiku-4-5", "expected": "6105.3", "actual": "6105.30", "correct": true, "inputTokens": 4079, "outputTokens": 8, "latencyMs": 991 }, { "questionId": "q116", "format": "toon", "model": "gpt-4o-mini", "expected": "6105.3", "actual": "6105.3", "correct": true, "inputTokens": 1563, "outputTokens": 5, "latencyMs": 1257 }, { "questionId": "q116", "format": "toon", "model": "claude-haiku-4-5", "expected": "6105.3", "actual": "6105.3", "correct": true, "inputTokens": 1508, "outputTokens": 8, "latencyMs": 1004 }, { "questionId": "q116", "format": "csv", "model": "gpt-4o-mini", "expected": "6105.3", "actual": "6105.3", "correct": true, "inputTokens": 1441, "outputTokens": 5, "latencyMs": 1620 }, { "questionId": "q116", "format": "csv", "model": "claude-haiku-4-5", "expected": "6105.3", "actual": "6105.3", "correct": true, "inputTokens": 1444, "outputTokens": 8, "latencyMs": 991 }, { "questionId": "q116", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "6105.3", "actual": "6105.3", "correct": true, "inputTokens": 3829, "outputTokens": 5, "latencyMs": 1048 }, { "questionId": "q116", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "6105.3", "actual": "6105.3", "correct": true, "inputTokens": 3414, "outputTokens": 8, "latencyMs": 1189 }, { "questionId": "q116", "format": "yaml", "model": "gpt-4o-mini", "expected": "6105.3", "actual": "6105.3", "correct": true, "inputTokens": 2985, "outputTokens": 5, "latencyMs": 3282 }, { "questionId": "q116", "format": "yaml", "model": "claude-haiku-4-5", "expected": "6105.3", "actual": "6105.3", "correct": true, "inputTokens": 3109, "outputTokens": 8, "latencyMs": 985 }, { "questionId": "q117", "format": "json", "model": "gpt-4o-mini", "expected": "6528", "actual": "6528", "correct": true, "inputTokens": 3713, "outputTokens": 3, "latencyMs": 871 }, { "questionId": "q117", "format": "json", "model": "claude-haiku-4-5", "expected": "6528", "actual": "6528", "correct": true, "inputTokens": 4080, "outputTokens": 6, "latencyMs": 1042 }, { "questionId": "q117", "format": "toon", "model": "gpt-4o-mini", "expected": "6528", "actual": "6528", "correct": true, "inputTokens": 1564, "outputTokens": 3, "latencyMs": 999 }, { "questionId": "q117", "format": "toon", "model": "claude-haiku-4-5", "expected": "6528", "actual": "6528", "correct": true, "inputTokens": 1509, "outputTokens": 6, "latencyMs": 1111 }, { "questionId": "q117", "format": "csv", "model": "gpt-4o-mini", "expected": "6528", "actual": "6528", "correct": true, "inputTokens": 1442, "outputTokens": 3, "latencyMs": 1132 }, { "questionId": "q117", "format": "csv", "model": "claude-haiku-4-5", "expected": "6528", "actual": "6528", "correct": true, "inputTokens": 1445, "outputTokens": 6, "latencyMs": 1004 }, { "questionId": "q117", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "6528", "actual": "6528", "correct": true, "inputTokens": 3830, "outputTokens": 3, "latencyMs": 1162 }, { "questionId": "q117", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "6528", "actual": "6528", "correct": true, "inputTokens": 3415, "outputTokens": 6, "latencyMs": 1271 }, { "questionId": "q117", "format": "yaml", "model": "gpt-4o-mini", "expected": "6528", "actual": "6528", "correct": true, "inputTokens": 2986, "outputTokens": 3, "latencyMs": 961 }, { "questionId": "q117", "format": "yaml", "model": "claude-haiku-4-5", "expected": "6528", "actual": "6528", "correct": true, "inputTokens": 3110, "outputTokens": 6, "latencyMs": 1289 }, { "questionId": "q118", "format": "json", "model": "gpt-4o-mini", "expected": "1136.09", "actual": "1136.09", "correct": true, "inputTokens": 3712, "outputTokens": 5, "latencyMs": 1634 }, { "questionId": "q118", "format": "json", "model": "claude-haiku-4-5", "expected": "1136.09", "actual": "1136.09", "correct": true, "inputTokens": 4079, "outputTokens": 8, "latencyMs": 1198 }, { "questionId": "q118", "format": "toon", "model": "gpt-4o-mini", "expected": "1136.09", "actual": "1136.09", "correct": true, "inputTokens": 1563, "outputTokens": 5, "latencyMs": 2678 }, { "questionId": "q118", "format": "toon", "model": "claude-haiku-4-5", "expected": "1136.09", "actual": "1136.09", "correct": true, "inputTokens": 1508, "outputTokens": 8, "latencyMs": 1155 }, { "questionId": "q118", "format": "csv", "model": "gpt-4o-mini", "expected": "1136.09", "actual": "1136.09", "correct": true, "inputTokens": 1441, "outputTokens": 5, "latencyMs": 1104 }, { "questionId": "q118", "format": "csv", "model": "claude-haiku-4-5", "expected": "1136.09", "actual": "1136.09", "correct": true, "inputTokens": 1444, "outputTokens": 8, "latencyMs": 1109 }, { "questionId": "q118", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "1136.09", "actual": "1136.09", "correct": true, "inputTokens": 3829, "outputTokens": 5, "latencyMs": 3756 }, { "questionId": "q118", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "1136.09", "actual": "1136.09", "correct": true, "inputTokens": 3414, "outputTokens": 8, "latencyMs": 1082 }, { "questionId": "q118", "format": "yaml", "model": "gpt-4o-mini", "expected": "1136.09", "actual": "1136.09", "correct": true, "inputTokens": 2985, "outputTokens": 5, "latencyMs": 1451 }, { "questionId": "q118", "format": "yaml", "model": "claude-haiku-4-5", "expected": "1136.09", "actual": "1136.09", "correct": true, "inputTokens": 3109, "outputTokens": 8, "latencyMs": 1730 }, { "questionId": "q119", "format": "json", "model": "gpt-4o-mini", "expected": "4689", "actual": "4689", "correct": true, "inputTokens": 3713, "outputTokens": 3, "latencyMs": 1327 }, { "questionId": "q119", "format": "json", "model": "claude-haiku-4-5", "expected": "4689", "actual": "4689", "correct": true, "inputTokens": 4080, "outputTokens": 6, "latencyMs": 1282 }, { "questionId": "q119", "format": "toon", "model": "gpt-4o-mini", "expected": "4689", "actual": "4689", "correct": true, "inputTokens": 1564, "outputTokens": 3, "latencyMs": 1368 }, { "questionId": "q119", "format": "toon", "model": "claude-haiku-4-5", "expected": "4689", "actual": "4689", "correct": true, "inputTokens": 1509, "outputTokens": 6, "latencyMs": 1487 }, { "questionId": "q119", "format": "csv", "model": "gpt-4o-mini", "expected": "4689", "actual": "4689", "correct": true, "inputTokens": 1442, "outputTokens": 3, "latencyMs": 2752 }, { "questionId": "q119", "format": "csv", "model": "claude-haiku-4-5", "expected": "4689", "actual": "4689", "correct": true, "inputTokens": 1445, "outputTokens": 6, "latencyMs": 909 }, { "questionId": "q119", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "4689", "actual": "4689", "correct": true, "inputTokens": 3830, "outputTokens": 3, "latencyMs": 3502 }, { "questionId": "q119", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "4689", "actual": "4689", "correct": true, "inputTokens": 3415, "outputTokens": 6, "latencyMs": 1212 }, { "questionId": "q119", "format": "yaml", "model": "gpt-4o-mini", "expected": "4689", "actual": "4689", "correct": true, "inputTokens": 2986, "outputTokens": 3, "latencyMs": 1218 }, { "questionId": "q119", "format": "yaml", "model": "claude-haiku-4-5", "expected": "4689", "actual": "4689", "correct": true, "inputTokens": 3110, "outputTokens": 6, "latencyMs": 1064 }, { "questionId": "q120", "format": "json", "model": "gpt-4o-mini", "expected": "2637.73", "actual": "2637.73", "correct": true, "inputTokens": 3712, "outputTokens": 5, "latencyMs": 2777 }, { "questionId": "q120", "format": "json", "model": "claude-haiku-4-5", "expected": "2637.73", "actual": "2637.73", "correct": true, "inputTokens": 4079, "outputTokens": 8, "latencyMs": 1246 }, { "questionId": "q120", "format": "toon", "model": "gpt-4o-mini", "expected": "2637.73", "actual": "2637.73", "correct": true, "inputTokens": 1563, "outputTokens": 5, "latencyMs": 1424 }, { "questionId": "q120", "format": "toon", "model": "claude-haiku-4-5", "expected": "2637.73", "actual": "2637.73", "correct": true, "inputTokens": 1508, "outputTokens": 8, "latencyMs": 1074 }, { "questionId": "q120", "format": "csv", "model": "gpt-4o-mini", "expected": "2637.73", "actual": "2637.73", "correct": true, "inputTokens": 1441, "outputTokens": 5, "latencyMs": 2803 }, { "questionId": "q120", "format": "csv", "model": "claude-haiku-4-5", "expected": "2637.73", "actual": "2637.73", "correct": true, "inputTokens": 1444, "outputTokens": 8, "latencyMs": 1107 }, { "questionId": "q120", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "2637.73", "actual": "2637.73", "correct": true, "inputTokens": 3829, "outputTokens": 5, "latencyMs": 1066 }, { "questionId": "q120", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "2637.73", "actual": "2637.73", "correct": true, "inputTokens": 3414, "outputTokens": 8, "latencyMs": 1325 }, { "questionId": "q120", "format": "yaml", "model": "gpt-4o-mini", "expected": "2637.73", "actual": "2637.73", "correct": true, "inputTokens": 2985, "outputTokens": 5, "latencyMs": 1330 }, { "questionId": "q120", "format": "yaml", "model": "claude-haiku-4-5", "expected": "2637.73", "actual": "2637.73", "correct": true, "inputTokens": 3109, "outputTokens": 8, "latencyMs": 1192 }, { "questionId": "q121", "format": "json", "model": "gpt-4o-mini", "expected": "5685", "actual": "5685", "correct": true, "inputTokens": 3713, "outputTokens": 3, "latencyMs": 1139 }, { "questionId": "q121", "format": "json", "model": "claude-haiku-4-5", "expected": "5685", "actual": "5685", "correct": true, "inputTokens": 4080, "outputTokens": 6, "latencyMs": 994 }, { "questionId": "q121", "format": "toon", "model": "gpt-4o-mini", "expected": "5685", "actual": "5685", "correct": true, "inputTokens": 1564, "outputTokens": 3, "latencyMs": 1309 }, { "questionId": "q121", "format": "toon", "model": "claude-haiku-4-5", "expected": "5685", "actual": "5685", "correct": true, "inputTokens": 1509, "outputTokens": 6, "latencyMs": 1184 }, { "questionId": "q121", "format": "csv", "model": "gpt-4o-mini", "expected": "5685", "actual": "5685", "correct": true, "inputTokens": 1442, "outputTokens": 3, "latencyMs": 1182 }, { "questionId": "q121", "format": "csv", "model": "claude-haiku-4-5", "expected": "5685", "actual": "5685", "correct": true, "inputTokens": 1445, "outputTokens": 6, "latencyMs": 1381 }, { "questionId": "q121", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "5685", "actual": "5685", "correct": true, "inputTokens": 3830, "outputTokens": 3, "latencyMs": 1103 }, { "questionId": "q121", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "5685", "actual": "5685", "correct": true, "inputTokens": 3415, "outputTokens": 6, "latencyMs": 1220 }, { "questionId": "q121", "format": "yaml", "model": "gpt-4o-mini", "expected": "5685", "actual": "5685", "correct": true, "inputTokens": 2986, "outputTokens": 3, "latencyMs": 1169 }, { "questionId": "q121", "format": "yaml", "model": "claude-haiku-4-5", "expected": "5685", "actual": "5685", "correct": true, "inputTokens": 3110, "outputTokens": 6, "latencyMs": 1208 }, { "questionId": "q122", "format": "json", "model": "gpt-4o-mini", "expected": "3421.06", "actual": "3421.06", "correct": true, "inputTokens": 3712, "outputTokens": 5, "latencyMs": 1037 }, { "questionId": "q122", "format": "json", "model": "claude-haiku-4-5", "expected": "3421.06", "actual": "3421.06", "correct": true, "inputTokens": 4079, "outputTokens": 8, "latencyMs": 1278 }, { "questionId": "q122", "format": "toon", "model": "gpt-4o-mini", "expected": "3421.06", "actual": "3421.06", "correct": true, "inputTokens": 1563, "outputTokens": 5, "latencyMs": 1441 }, { "questionId": "q122", "format": "toon", "model": "claude-haiku-4-5", "expected": "3421.06", "actual": "3421.06", "correct": true, "inputTokens": 1508, "outputTokens": 8, "latencyMs": 1204 }, { "questionId": "q122", "format": "csv", "model": "gpt-4o-mini", "expected": "3421.06", "actual": "3421.06", "correct": true, "inputTokens": 1441, "outputTokens": 5, "latencyMs": 1782 }, { "questionId": "q122", "format": "csv", "model": "claude-haiku-4-5", "expected": "3421.06", "actual": "3421.06", "correct": true, "inputTokens": 1444, "outputTokens": 8, "latencyMs": 1088 }, { "questionId": "q122", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "3421.06", "actual": "3421.06", "correct": true, "inputTokens": 3829, "outputTokens": 5, "latencyMs": 1447 }, { "questionId": "q122", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "3421.06", "actual": "3421.06", "correct": true, "inputTokens": 3414, "outputTokens": 8, "latencyMs": 1356 }, { "questionId": "q122", "format": "yaml", "model": "gpt-4o-mini", "expected": "3421.06", "actual": "3421.06", "correct": true, "inputTokens": 2985, "outputTokens": 5, "latencyMs": 1309 }, { "questionId": "q122", "format": "yaml", "model": "claude-haiku-4-5", "expected": "3421.06", "actual": "3421.06", "correct": true, "inputTokens": 3109, "outputTokens": 8, "latencyMs": 995 }, { "questionId": "q123", "format": "json", "model": "gpt-4o-mini", "expected": "344498", "actual": "188,000", "correct": false, "inputTokens": 3710, "outputTokens": 4, "latencyMs": 1405 }, { "questionId": "q123", "format": "json", "model": "claude-haiku-4-5", "expected": "344498", "actual": "188,945", "correct": false, "inputTokens": 4077, "outputTokens": 7, "latencyMs": 1110 }, { "questionId": "q123", "format": "toon", "model": "gpt-4o-mini", "expected": "344498", "actual": "186,000", "correct": false, "inputTokens": 1561, "outputTokens": 4, "latencyMs": 1306 }, { "questionId": "q123", "format": "toon", "model": "claude-haiku-4-5", "expected": "344498", "actual": "337,045", "correct": false, "inputTokens": 1506, "outputTokens": 7, "latencyMs": 1292 }, { "questionId": "q123", "format": "csv", "model": "gpt-4o-mini", "expected": "344498", "actual": "188,000", "correct": false, "inputTokens": 1439, "outputTokens": 4, "latencyMs": 2659 }, { "questionId": "q123", "format": "csv", "model": "claude-haiku-4-5", "expected": "344498", "actual": "372,915", "correct": false, "inputTokens": 1442, "outputTokens": 7, "latencyMs": 966 }, { "questionId": "q123", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "344498", "actual": "174,000", "correct": false, "inputTokens": 3827, "outputTokens": 4, "latencyMs": 1177 }, { "questionId": "q123", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "344498", "actual": "188,647", "correct": false, "inputTokens": 3412, "outputTokens": 7, "latencyMs": 1018 }, { "questionId": "q123", "format": "yaml", "model": "gpt-4o-mini", "expected": "344498", "actual": "188,000", "correct": false, "inputTokens": 2983, "outputTokens": 4, "latencyMs": 1659 }, { "questionId": "q123", "format": "yaml", "model": "claude-haiku-4-5", "expected": "344498", "actual": "181,854", "correct": false, "inputTokens": 3107, "outputTokens": 7, "latencyMs": 1894 }, { "questionId": "q124", "format": "json", "model": "gpt-4o-mini", "expected": "312818.50", "actual": "188,174.36", "correct": false, "inputTokens": 3708, "outputTokens": 6, "latencyMs": 2900 }, { "questionId": "q124", "format": "json", "model": "claude-haiku-4-5", "expected": "312818.50", "actual": "287,745.89", "correct": false, "inputTokens": 4075, "outputTokens": 9, "latencyMs": 1196 }, { "questionId": "q124", "format": "toon", "model": "gpt-4o-mini", "expected": "312818.50", "actual": "Total revenue across all dates is 139,155.36.", "correct": false, "inputTokens": 1559, "outputTokens": 14, "latencyMs": 1401 }, { "questionId": "q124", "format": "toon", "model": "claude-haiku-4-5", "expected": "312818.50", "actual": "487,891.45", "correct": false, "inputTokens": 1504, "outputTokens": 9, "latencyMs": 1118 }, { "questionId": "q124", "format": "csv", "model": "gpt-4o-mini", "expected": "312818.50", "actual": "Total revenue across all dates is 155,000.00.", "correct": false, "inputTokens": 1437, "outputTokens": 14, "latencyMs": 1308 }, { "questionId": "q124", "format": "csv", "model": "claude-haiku-4-5", "expected": "312818.50", "actual": "487,891.89", "correct": false, "inputTokens": 1440, "outputTokens": 9, "latencyMs": 1120 }, { "questionId": "q124", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "312818.50", "actual": "Total revenue across all dates is 155,155.36.", "correct": false, "inputTokens": 3825, "outputTokens": 14, "latencyMs": 1143 }, { "questionId": "q124", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "312818.50", "actual": "381,968.89", "correct": false, "inputTokens": 3410, "outputTokens": 9, "latencyMs": 1172 }, { "questionId": "q124", "format": "yaml", "model": "gpt-4o-mini", "expected": "312818.50", "actual": "Total revenue across all dates is 155,155.36.", "correct": false, "inputTokens": 2981, "outputTokens": 14, "latencyMs": 1179 }, { "questionId": "q124", "format": "yaml", "model": "claude-haiku-4-5", "expected": "312818.50", "actual": "381,847.89", "correct": false, "inputTokens": 3105, "outputTokens": 9, "latencyMs": 1073 }, { "questionId": "q125", "format": "json", "model": "gpt-4o-mini", "expected": "1811", "actual": "1030", "correct": false, "inputTokens": 3710, "outputTokens": 3, "latencyMs": 3823 }, { "questionId": "q125", "format": "json", "model": "claude-haiku-4-5", "expected": "1811", "actual": "1,234", "correct": false, "inputTokens": 4078, "outputTokens": 7, "latencyMs": 1153 }, { "questionId": "q125", "format": "toon", "model": "gpt-4o-mini", "expected": "1811", "actual": "1040", "correct": false, "inputTokens": 1561, "outputTokens": 3, "latencyMs": 1472 }, { "questionId": "q125", "format": "toon", "model": "claude-haiku-4-5", "expected": "1811", "actual": "1,945", "correct": false, "inputTokens": 1507, "outputTokens": 7, "latencyMs": 940 }, { "questionId": "q125", "format": "csv", "model": "gpt-4o-mini", "expected": "1811", "actual": "1030", "correct": false, "inputTokens": 1439, "outputTokens": 3, "latencyMs": 1067 }, { "questionId": "q125", "format": "csv", "model": "claude-haiku-4-5", "expected": "1811", "actual": "1,945", "correct": false, "inputTokens": 1443, "outputTokens": 7, "latencyMs": 1183 }, { "questionId": "q125", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "1811", "actual": "Total conversions: 1030", "correct": false, "inputTokens": 3827, "outputTokens": 7, "latencyMs": 1103 }, { "questionId": "q125", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "1811", "actual": "1,454", "correct": false, "inputTokens": 3413, "outputTokens": 7, "latencyMs": 1067 }, { "questionId": "q125", "format": "yaml", "model": "gpt-4o-mini", "expected": "1811", "actual": "1040", "correct": false, "inputTokens": 2983, "outputTokens": 3, "latencyMs": 932 }, { "questionId": "q125", "format": "yaml", "model": "claude-haiku-4-5", "expected": "1811", "actual": "1,454", "correct": false, "inputTokens": 3108, "outputTokens": 7, "latencyMs": 1530 }, { "questionId": "q126", "format": "json", "model": "gpt-4o-mini", "expected": "42", "actual": "42", "correct": true, "inputTokens": 3710, "outputTokens": 2, "latencyMs": 1016 }, { "questionId": "q126", "format": "json", "model": "claude-haiku-4-5", "expected": "42", "actual": "42", "correct": true, "inputTokens": 4078, "outputTokens": 5, "latencyMs": 1440 }, { "questionId": "q126", "format": "toon", "model": "gpt-4o-mini", "expected": "42", "actual": "24", "correct": false, "inputTokens": 1561, "outputTokens": 2, "latencyMs": 1206 }, { "questionId": "q126", "format": "toon", "model": "claude-haiku-4-5", "expected": "42", "actual": "42", "correct": true, "inputTokens": 1507, "outputTokens": 5, "latencyMs": 1452 }, { "questionId": "q126", "format": "csv", "model": "gpt-4o-mini", "expected": "42", "actual": "22", "correct": false, "inputTokens": 1439, "outputTokens": 2, "latencyMs": 1249 }, { "questionId": "q126", "format": "csv", "model": "claude-haiku-4-5", "expected": "42", "actual": "42", "correct": true, "inputTokens": 1443, "outputTokens": 5, "latencyMs": 1248 }, { "questionId": "q126", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "42", "actual": "20", "correct": false, "inputTokens": 3827, "outputTokens": 2, "latencyMs": 1420 }, { "questionId": "q126", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "42", "actual": "47", "correct": false, "inputTokens": 3413, "outputTokens": 5, "latencyMs": 900 }, { "questionId": "q126", "format": "yaml", "model": "gpt-4o-mini", "expected": "42", "actual": "42", "correct": true, "inputTokens": 2983, "outputTokens": 2, "latencyMs": 1309 }, { "questionId": "q126", "format": "yaml", "model": "claude-haiku-4-5", "expected": "42", "actual": "47", "correct": false, "inputTokens": 3108, "outputTokens": 5, "latencyMs": 1216 }, { "questionId": "q127", "format": "json", "model": "gpt-4o-mini", "expected": "28", "actual": "38", "correct": false, "inputTokens": 3710, "outputTokens": 2, "latencyMs": 3911 }, { "questionId": "q127", "format": "json", "model": "claude-haiku-4-5", "expected": "28", "actual": "24", "correct": false, "inputTokens": 4078, "outputTokens": 5, "latencyMs": 1056 }, { "questionId": "q127", "format": "toon", "model": "gpt-4o-mini", "expected": "28", "actual": "20", "correct": false, "inputTokens": 1561, "outputTokens": 2, "latencyMs": 839 }, { "questionId": "q127", "format": "toon", "model": "claude-haiku-4-5", "expected": "28", "actual": "26", "correct": false, "inputTokens": 1507, "outputTokens": 5, "latencyMs": 965 }, { "questionId": "q127", "format": "csv", "model": "gpt-4o-mini", "expected": "28", "actual": "20", "correct": false, "inputTokens": 1439, "outputTokens": 2, "latencyMs": 2163 }, { "questionId": "q127", "format": "csv", "model": "claude-haiku-4-5", "expected": "28", "actual": "23", "correct": false, "inputTokens": 1443, "outputTokens": 5, "latencyMs": 1006 }, { "questionId": "q127", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "28", "actual": "18", "correct": false, "inputTokens": 3827, "outputTokens": 2, "latencyMs": 2619 }, { "questionId": "q127", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "28", "actual": "24", "correct": false, "inputTokens": 3413, "outputTokens": 5, "latencyMs": 989 }, { "questionId": "q127", "format": "yaml", "model": "gpt-4o-mini", "expected": "28", "actual": "22", "correct": false, "inputTokens": 2983, "outputTokens": 2, "latencyMs": 1830 }, { "questionId": "q127", "format": "yaml", "model": "claude-haiku-4-5", "expected": "28", "actual": "23", "correct": false, "inputTokens": 3108, "outputTokens": 5, "latencyMs": 1001 }, { "questionId": "q128", "format": "json", "model": "gpt-4o-mini", "expected": "11", "actual": "15", "correct": false, "inputTokens": 3710, "outputTokens": 2, "latencyMs": 1217 }, { "questionId": "q128", "format": "json", "model": "claude-haiku-4-5", "expected": "11", "actual": "11", "correct": true, "inputTokens": 4078, "outputTokens": 5, "latencyMs": 3180 }, { "questionId": "q128", "format": "toon", "model": "gpt-4o-mini", "expected": "11", "actual": "15", "correct": false, "inputTokens": 1561, "outputTokens": 2, "latencyMs": 1076 }, { "questionId": "q128", "format": "toon", "model": "claude-haiku-4-5", "expected": "11", "actual": "12", "correct": false, "inputTokens": 1507, "outputTokens": 5, "latencyMs": 912 }, { "questionId": "q128", "format": "csv", "model": "gpt-4o-mini", "expected": "11", "actual": "15", "correct": false, "inputTokens": 1439, "outputTokens": 2, "latencyMs": 2900 }, { "questionId": "q128", "format": "csv", "model": "claude-haiku-4-5", "expected": "11", "actual": "11", "correct": true, "inputTokens": 1443, "outputTokens": 5, "latencyMs": 1389 }, { "questionId": "q128", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "11", "actual": "12", "correct": false, "inputTokens": 3827, "outputTokens": 2, "latencyMs": 1107 }, { "questionId": "q128", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "11", "actual": "11", "correct": true, "inputTokens": 3413, "outputTokens": 5, "latencyMs": 1150 }, { "questionId": "q128", "format": "yaml", "model": "gpt-4o-mini", "expected": "11", "actual": "18", "correct": false, "inputTokens": 2983, "outputTokens": 2, "latencyMs": 1047 }, { "questionId": "q128", "format": "yaml", "model": "claude-haiku-4-5", "expected": "11", "actual": "11", "correct": true, "inputTokens": 3108, "outputTokens": 5, "latencyMs": 1169 }, { "questionId": "q129", "format": "json", "model": "gpt-4o-mini", "expected": "58", "actual": "36", "correct": false, "inputTokens": 3709, "outputTokens": 2, "latencyMs": 1007 }, { "questionId": "q129", "format": "json", "model": "claude-haiku-4-5", "expected": "58", "actual": "50", "correct": false, "inputTokens": 4078, "outputTokens": 5, "latencyMs": 1342 }, { "questionId": "q129", "format": "toon", "model": "gpt-4o-mini", "expected": "58", "actual": "24", "correct": false, "inputTokens": 1560, "outputTokens": 2, "latencyMs": 828 }, { "questionId": "q129", "format": "toon", "model": "claude-haiku-4-5", "expected": "58", "actual": "47", "correct": false, "inputTokens": 1507, "outputTokens": 5, "latencyMs": 1305 }, { "questionId": "q129", "format": "csv", "model": "gpt-4o-mini", "expected": "58", "actual": "15", "correct": false, "inputTokens": 1438, "outputTokens": 2, "latencyMs": 1305 }, { "questionId": "q129", "format": "csv", "model": "claude-haiku-4-5", "expected": "58", "actual": "54", "correct": false, "inputTokens": 1443, "outputTokens": 5, "latencyMs": 1406 }, { "questionId": "q129", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "58", "actual": "18", "correct": false, "inputTokens": 3826, "outputTokens": 2, "latencyMs": 1513 }, { "questionId": "q129", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "58", "actual": "47", "correct": false, "inputTokens": 3413, "outputTokens": 5, "latencyMs": 1026 }, { "questionId": "q129", "format": "yaml", "model": "gpt-4o-mini", "expected": "58", "actual": "42", "correct": false, "inputTokens": 2982, "outputTokens": 2, "latencyMs": 1373 }, { "questionId": "q129", "format": "yaml", "model": "claude-haiku-4-5", "expected": "58", "actual": "54", "correct": false, "inputTokens": 3108, "outputTokens": 5, "latencyMs": 1112 }, { "questionId": "q130", "format": "json", "model": "gpt-4o-mini", "expected": "41", "actual": "34", "correct": false, "inputTokens": 3709, "outputTokens": 2, "latencyMs": 1248 }, { "questionId": "q130", "format": "json", "model": "claude-haiku-4-5", "expected": "41", "actual": "31", "correct": false, "inputTokens": 4078, "outputTokens": 5, "latencyMs": 1083 }, { "questionId": "q130", "format": "toon", "model": "gpt-4o-mini", "expected": "41", "actual": "24", "correct": false, "inputTokens": 1560, "outputTokens": 2, "latencyMs": 895 }, { "questionId": "q130", "format": "toon", "model": "claude-haiku-4-5", "expected": "41", "actual": "38", "correct": false, "inputTokens": 1507, "outputTokens": 5, "latencyMs": 1087 }, { "questionId": "q130", "format": "csv", "model": "gpt-4o-mini", "expected": "41", "actual": "18", "correct": false, "inputTokens": 1438, "outputTokens": 2, "latencyMs": 1157 }, { "questionId": "q130", "format": "csv", "model": "claude-haiku-4-5", "expected": "41", "actual": "38", "correct": false, "inputTokens": 1443, "outputTokens": 5, "latencyMs": 1155 }, { "questionId": "q130", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "41", "actual": "18", "correct": false, "inputTokens": 3826, "outputTokens": 2, "latencyMs": 1959 }, { "questionId": "q130", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "41", "actual": "31", "correct": false, "inputTokens": 3413, "outputTokens": 5, "latencyMs": 1110 }, { "questionId": "q130", "format": "yaml", "model": "gpt-4o-mini", "expected": "41", "actual": "34", "correct": false, "inputTokens": 2982, "outputTokens": 2, "latencyMs": 4540 }, { "questionId": "q130", "format": "yaml", "model": "claude-haiku-4-5", "expected": "41", "actual": "31", "correct": false, "inputTokens": 3108, "outputTokens": 5, "latencyMs": 1286 }, { "questionId": "q131", "format": "json", "model": "gpt-4o-mini", "expected": "23", "actual": "18", "correct": false, "inputTokens": 3709, "outputTokens": 2, "latencyMs": 1059 }, { "questionId": "q131", "format": "json", "model": "claude-haiku-4-5", "expected": "23", "actual": "20", "correct": false, "inputTokens": 4078, "outputTokens": 5, "latencyMs": 1302 }, { "questionId": "q131", "format": "toon", "model": "gpt-4o-mini", "expected": "23", "actual": "18", "correct": false, "inputTokens": 1560, "outputTokens": 2, "latencyMs": 1019 }, { "questionId": "q131", "format": "toon", "model": "claude-haiku-4-5", "expected": "23", "actual": "20", "correct": false, "inputTokens": 1507, "outputTokens": 5, "latencyMs": 975 }, { "questionId": "q131", "format": "csv", "model": "gpt-4o-mini", "expected": "23", "actual": "18", "correct": false, "inputTokens": 1438, "outputTokens": 2, "latencyMs": 1056 }, { "questionId": "q131", "format": "csv", "model": "claude-haiku-4-5", "expected": "23", "actual": "20", "correct": false, "inputTokens": 1443, "outputTokens": 5, "latencyMs": 984 }, { "questionId": "q131", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "23", "actual": "15", "correct": false, "inputTokens": 3826, "outputTokens": 2, "latencyMs": 1420 }, { "questionId": "q131", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "23", "actual": "21", "correct": false, "inputTokens": 3413, "outputTokens": 5, "latencyMs": 1139 }, { "questionId": "q131", "format": "yaml", "model": "gpt-4o-mini", "expected": "23", "actual": "18", "correct": false, "inputTokens": 2982, "outputTokens": 2, "latencyMs": 1097 }, { "questionId": "q131", "format": "yaml", "model": "claude-haiku-4-5", "expected": "23", "actual": "21", "correct": false, "inputTokens": 3108, "outputTokens": 5, "latencyMs": 1203 }, { "questionId": "q132", "format": "json", "model": "gpt-4o-mini", "expected": "430828", "actual": "430828", "correct": true, "inputTokens": 15188, "outputTokens": 3, "latencyMs": 2257 }, { "questionId": "q132", "format": "json", "model": "claude-haiku-4-5", "expected": "430828", "actual": "430828", "correct": true, "inputTokens": 17409, "outputTokens": 6, "latencyMs": 1292 }, { "questionId": "q132", "format": "toon", "model": "gpt-4o-mini", "expected": "430828", "actual": "430828", "correct": true, "inputTokens": 8789, "outputTokens": 3, "latencyMs": 1877 }, { "questionId": "q132", "format": "toon", "model": "claude-haiku-4-5", "expected": "430828", "actual": "430828", "correct": true, "inputTokens": 9279, "outputTokens": 6, "latencyMs": 1118 }, { "questionId": "q132", "format": "csv", "model": "gpt-4o-mini", "expected": "430828", "actual": "430828", "correct": true, "inputTokens": 8557, "outputTokens": 3, "latencyMs": 4023 }, { "questionId": "q132", "format": "csv", "model": "claude-haiku-4-5", "expected": "430828", "actual": "430828", "correct": true, "inputTokens": 9125, "outputTokens": 6, "latencyMs": 1134 }, { "questionId": "q132", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "430828", "actual": "430828", "correct": true, "inputTokens": 15482, "outputTokens": 3, "latencyMs": 5304 }, { "questionId": "q132", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "430828", "actual": "430828", "correct": true, "inputTokens": 15367, "outputTokens": 6, "latencyMs": 1442 }, { "questionId": "q132", "format": "yaml", "model": "gpt-4o-mini", "expected": "430828", "actual": "430828", "correct": true, "inputTokens": 13172, "outputTokens": 3, "latencyMs": 2157 }, { "questionId": "q132", "format": "yaml", "model": "claude-haiku-4-5", "expected": "430828", "actual": "430828", "correct": true, "inputTokens": 14483, "outputTokens": 6, "latencyMs": 1483 }, { "questionId": "q133", "format": "json", "model": "gpt-4o-mini", "expected": "11798", "actual": "11798", "correct": true, "inputTokens": 15190, "outputTokens": 3, "latencyMs": 2084 }, { "questionId": "q133", "format": "json", "model": "claude-haiku-4-5", "expected": "11798", "actual": "11798", "correct": true, "inputTokens": 17410, "outputTokens": 6, "latencyMs": 2592 }, { "questionId": "q133", "format": "toon", "model": "gpt-4o-mini", "expected": "11798", "actual": "11798", "correct": true, "inputTokens": 8791, "outputTokens": 3, "latencyMs": 1208 }, { "questionId": "q133", "format": "toon", "model": "claude-haiku-4-5", "expected": "11798", "actual": "11798", "correct": true, "inputTokens": 9280, "outputTokens": 6, "latencyMs": 1261 }, { "questionId": "q133", "format": "csv", "model": "gpt-4o-mini", "expected": "11798", "actual": "11798", "correct": true, "inputTokens": 8559, "outputTokens": 3, "latencyMs": 1697 }, { "questionId": "q133", "format": "csv", "model": "claude-haiku-4-5", "expected": "11798", "actual": "11798", "correct": true, "inputTokens": 9126, "outputTokens": 6, "latencyMs": 1171 }, { "questionId": "q133", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "11798", "actual": "11798", "correct": true, "inputTokens": 15484, "outputTokens": 3, "latencyMs": 1704 }, { "questionId": "q133", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "11798", "actual": "11798", "correct": true, "inputTokens": 15368, "outputTokens": 6, "latencyMs": 1637 }, { "questionId": "q133", "format": "yaml", "model": "gpt-4o-mini", "expected": "11798", "actual": "11798", "correct": true, "inputTokens": 13174, "outputTokens": 3, "latencyMs": 1599 }, { "questionId": "q133", "format": "yaml", "model": "claude-haiku-4-5", "expected": "11798", "actual": "11798", "correct": true, "inputTokens": 14484, "outputTokens": 6, "latencyMs": 1505 }, { "questionId": "q134", "format": "json", "model": "gpt-4o-mini", "expected": "183631", "actual": "183631", "correct": true, "inputTokens": 15193, "outputTokens": 3, "latencyMs": 2340 }, { "questionId": "q134", "format": "json", "model": "claude-haiku-4-5", "expected": "183631", "actual": "183631", "correct": true, "inputTokens": 17412, "outputTokens": 6, "latencyMs": 1380 }, { "questionId": "q134", "format": "toon", "model": "gpt-4o-mini", "expected": "183631", "actual": "183631", "correct": true, "inputTokens": 8794, "outputTokens": 3, "latencyMs": 1631 }, { "questionId": "q134", "format": "toon", "model": "claude-haiku-4-5", "expected": "183631", "actual": "183631", "correct": true, "inputTokens": 9282, "outputTokens": 6, "latencyMs": 1271 }, { "questionId": "q134", "format": "csv", "model": "gpt-4o-mini", "expected": "183631", "actual": "183631", "correct": true, "inputTokens": 8562, "outputTokens": 3, "latencyMs": 1620 }, { "questionId": "q134", "format": "csv", "model": "claude-haiku-4-5", "expected": "183631", "actual": "183631", "correct": true, "inputTokens": 9128, "outputTokens": 6, "latencyMs": 1279 }, { "questionId": "q134", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "183631", "actual": "183631", "correct": true, "inputTokens": 15487, "outputTokens": 3, "latencyMs": 14565 }, { "questionId": "q134", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "183631", "actual": "183631", "correct": true, "inputTokens": 15370, "outputTokens": 6, "latencyMs": 1559 }, { "questionId": "q134", "format": "yaml", "model": "gpt-4o-mini", "expected": "183631", "actual": "183631", "correct": true, "inputTokens": 13177, "outputTokens": 3, "latencyMs": 1600 }, { "questionId": "q134", "format": "yaml", "model": "claude-haiku-4-5", "expected": "183631", "actual": "183631", "correct": true, "inputTokens": 14486, "outputTokens": 6, "latencyMs": 1179 }, { "questionId": "q135", "format": "json", "model": "gpt-4o-mini", "expected": "29246", "actual": "29246", "correct": true, "inputTokens": 15192, "outputTokens": 3, "latencyMs": 2508 }, { "questionId": "q135", "format": "json", "model": "claude-haiku-4-5", "expected": "29246", "actual": "29246", "correct": true, "inputTokens": 17412, "outputTokens": 6, "latencyMs": 1359 }, { "questionId": "q135", "format": "toon", "model": "gpt-4o-mini", "expected": "29246", "actual": "29246", "correct": true, "inputTokens": 8793, "outputTokens": 3, "latencyMs": 1188 }, { "questionId": "q135", "format": "toon", "model": "claude-haiku-4-5", "expected": "29246", "actual": "29246", "correct": true, "inputTokens": 9282, "outputTokens": 6, "latencyMs": 1204 }, { "questionId": "q135", "format": "csv", "model": "gpt-4o-mini", "expected": "29246", "actual": "29246", "correct": true, "inputTokens": 8561, "outputTokens": 3, "latencyMs": 2448 }, { "questionId": "q135", "format": "csv", "model": "claude-haiku-4-5", "expected": "29246", "actual": "29246", "correct": true, "inputTokens": 9128, "outputTokens": 6, "latencyMs": 1311 }, { "questionId": "q135", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "29246", "actual": "29246", "correct": true, "inputTokens": 15486, "outputTokens": 3, "latencyMs": 2442 }, { "questionId": "q135", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "29246", "actual": "29246", "correct": true, "inputTokens": 15370, "outputTokens": 6, "latencyMs": 1414 }, { "questionId": "q135", "format": "yaml", "model": "gpt-4o-mini", "expected": "29246", "actual": "29246", "correct": true, "inputTokens": 13176, "outputTokens": 3, "latencyMs": 2254 }, { "questionId": "q135", "format": "yaml", "model": "claude-haiku-4-5", "expected": "29246", "actual": "29246", "correct": true, "inputTokens": 14486, "outputTokens": 6, "latencyMs": 1512 }, { "questionId": "q136", "format": "json", "model": "gpt-4o-mini", "expected": "135306", "actual": "135306", "correct": true, "inputTokens": 15188, "outputTokens": 3, "latencyMs": 1565 }, { "questionId": "q136", "format": "json", "model": "claude-haiku-4-5", "expected": "135306", "actual": "135306", "correct": true, "inputTokens": 17407, "outputTokens": 6, "latencyMs": 1871 }, { "questionId": "q136", "format": "toon", "model": "gpt-4o-mini", "expected": "135306", "actual": "135306", "correct": true, "inputTokens": 8789, "outputTokens": 3, "latencyMs": 1963 }, { "questionId": "q136", "format": "toon", "model": "claude-haiku-4-5", "expected": "135306", "actual": "135306", "correct": true, "inputTokens": 9277, "outputTokens": 6, "latencyMs": 1533 }, { "questionId": "q136", "format": "csv", "model": "gpt-4o-mini", "expected": "135306", "actual": "135306", "correct": true, "inputTokens": 8557, "outputTokens": 3, "latencyMs": 1561 }, { "questionId": "q136", "format": "csv", "model": "claude-haiku-4-5", "expected": "135306", "actual": "135306", "correct": true, "inputTokens": 9123, "outputTokens": 6, "latencyMs": 1200 }, { "questionId": "q136", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "135306", "actual": "135306", "correct": true, "inputTokens": 15482, "outputTokens": 3, "latencyMs": 1657 }, { "questionId": "q136", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "135306", "actual": "135306", "correct": true, "inputTokens": 15365, "outputTokens": 6, "latencyMs": 1582 }, { "questionId": "q136", "format": "yaml", "model": "gpt-4o-mini", "expected": "135306", "actual": "135306", "correct": true, "inputTokens": 13172, "outputTokens": 3, "latencyMs": 3402 }, { "questionId": "q136", "format": "yaml", "model": "claude-haiku-4-5", "expected": "135306", "actual": "135306", "correct": true, "inputTokens": 14481, "outputTokens": 6, "latencyMs": 1251 }, { "questionId": "q137", "format": "json", "model": "gpt-4o-mini", "expected": "24914", "actual": "24914", "correct": true, "inputTokens": 15187, "outputTokens": 3, "latencyMs": 2019 }, { "questionId": "q137", "format": "json", "model": "claude-haiku-4-5", "expected": "24914", "actual": "24914", "correct": true, "inputTokens": 17408, "outputTokens": 6, "latencyMs": 1517 }, { "questionId": "q137", "format": "toon", "model": "gpt-4o-mini", "expected": "24914", "actual": "The repository undefined/react-native does not exist in the provided data.", "correct": false, "inputTokens": 8788, "outputTokens": 14, "latencyMs": 1737 }, { "questionId": "q137", "format": "toon", "model": "claude-haiku-4-5", "expected": "24914", "actual": "24914", "correct": true, "inputTokens": 9278, "outputTokens": 6, "latencyMs": 1467 }, { "questionId": "q137", "format": "csv", "model": "gpt-4o-mini", "expected": "24914", "actual": "24914", "correct": true, "inputTokens": 8556, "outputTokens": 3, "latencyMs": 3442 }, { "questionId": "q137", "format": "csv", "model": "claude-haiku-4-5", "expected": "24914", "actual": "24914", "correct": true, "inputTokens": 9124, "outputTokens": 6, "latencyMs": 1300 }, { "questionId": "q137", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "24914", "actual": "24914", "correct": true, "inputTokens": 15481, "outputTokens": 3, "latencyMs": 1825 }, { "questionId": "q137", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "24914", "actual": "24914", "correct": true, "inputTokens": 15366, "outputTokens": 6, "latencyMs": 1443 }, { "questionId": "q137", "format": "yaml", "model": "gpt-4o-mini", "expected": "24914", "actual": "124320", "correct": false, "inputTokens": 13171, "outputTokens": 3, "latencyMs": 1783 }, { "questionId": "q137", "format": "yaml", "model": "claude-haiku-4-5", "expected": "24914", "actual": "24914", "correct": true, "inputTokens": 14482, "outputTokens": 6, "latencyMs": 1362 }, { "questionId": "q138", "format": "json", "model": "gpt-4o-mini", "expected": "111683", "actual": "111683", "correct": true, "inputTokens": 15187, "outputTokens": 3, "latencyMs": 1824 }, { "questionId": "q138", "format": "json", "model": "claude-haiku-4-5", "expected": "111683", "actual": "111683", "correct": true, "inputTokens": 17407, "outputTokens": 6, "latencyMs": 1479 }, { "questionId": "q138", "format": "toon", "model": "gpt-4o-mini", "expected": "111683", "actual": "108017", "correct": false, "inputTokens": 8788, "outputTokens": 3, "latencyMs": 3315 }, { "questionId": "q138", "format": "toon", "model": "claude-haiku-4-5", "expected": "111683", "actual": "111683", "correct": true, "inputTokens": 9277, "outputTokens": 6, "latencyMs": 1270 }, { "questionId": "q138", "format": "csv", "model": "gpt-4o-mini", "expected": "111683", "actual": "111683", "correct": true, "inputTokens": 8556, "outputTokens": 3, "latencyMs": 1384 }, { "questionId": "q138", "format": "csv", "model": "claude-haiku-4-5", "expected": "111683", "actual": "111683", "correct": true, "inputTokens": 9123, "outputTokens": 6, "latencyMs": 1252 }, { "questionId": "q138", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "111683", "actual": "111683", "correct": true, "inputTokens": 15481, "outputTokens": 3, "latencyMs": 3048 }, { "questionId": "q138", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "111683", "actual": "111683", "correct": true, "inputTokens": 15365, "outputTokens": 6, "latencyMs": 1381 }, { "questionId": "q138", "format": "yaml", "model": "gpt-4o-mini", "expected": "111683", "actual": "111683", "correct": true, "inputTokens": 13171, "outputTokens": 3, "latencyMs": 3804 }, { "questionId": "q138", "format": "yaml", "model": "claude-haiku-4-5", "expected": "111683", "actual": "111683", "correct": true, "inputTokens": 14481, "outputTokens": 6, "latencyMs": 1498 }, { "questionId": "q139", "format": "json", "model": "gpt-4o-mini", "expected": "13364", "actual": "13364", "correct": true, "inputTokens": 15194, "outputTokens": 3, "latencyMs": 1726 }, { "questionId": "q139", "format": "json", "model": "claude-haiku-4-5", "expected": "13364", "actual": "13364", "correct": true, "inputTokens": 17412, "outputTokens": 6, "latencyMs": 1526 }, { "questionId": "q139", "format": "toon", "model": "gpt-4o-mini", "expected": "13364", "actual": "13364", "correct": true, "inputTokens": 8795, "outputTokens": 3, "latencyMs": 1685 }, { "questionId": "q139", "format": "toon", "model": "claude-haiku-4-5", "expected": "13364", "actual": "13364", "correct": true, "inputTokens": 9282, "outputTokens": 6, "latencyMs": 1140 }, { "questionId": "q139", "format": "csv", "model": "gpt-4o-mini", "expected": "13364", "actual": "0", "correct": false, "inputTokens": 8563, "outputTokens": 2, "latencyMs": 1933 }, { "questionId": "q139", "format": "csv", "model": "claude-haiku-4-5", "expected": "13364", "actual": "13364", "correct": true, "inputTokens": 9128, "outputTokens": 6, "latencyMs": 1157 }, { "questionId": "q139", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "13364", "actual": "13364", "correct": true, "inputTokens": 15488, "outputTokens": 3, "latencyMs": 1249 }, { "questionId": "q139", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "13364", "actual": "13364", "correct": true, "inputTokens": 15370, "outputTokens": 6, "latencyMs": 1347 }, { "questionId": "q139", "format": "yaml", "model": "gpt-4o-mini", "expected": "13364", "actual": "13364", "correct": true, "inputTokens": 13178, "outputTokens": 3, "latencyMs": 2174 }, { "questionId": "q139", "format": "yaml", "model": "claude-haiku-4-5", "expected": "13364", "actual": "13364", "correct": true, "inputTokens": 14486, "outputTokens": 6, "latencyMs": 1197 }, { "questionId": "q140", "format": "json", "model": "gpt-4o-mini", "expected": "98464", "actual": "0", "correct": false, "inputTokens": 15186, "outputTokens": 2, "latencyMs": 3252 }, { "questionId": "q140", "format": "json", "model": "claude-haiku-4-5", "expected": "98464", "actual": "98464", "correct": true, "inputTokens": 17405, "outputTokens": 6, "latencyMs": 1667 }, { "questionId": "q140", "format": "toon", "model": "gpt-4o-mini", "expected": "98464", "actual": "0", "correct": false, "inputTokens": 8787, "outputTokens": 2, "latencyMs": 1192 }, { "questionId": "q140", "format": "toon", "model": "claude-haiku-4-5", "expected": "98464", "actual": "98464", "correct": true, "inputTokens": 9275, "outputTokens": 6, "latencyMs": 1113 }, { "questionId": "q140", "format": "csv", "model": "gpt-4o-mini", "expected": "98464", "actual": "0", "correct": false, "inputTokens": 8555, "outputTokens": 2, "latencyMs": 2198 }, { "questionId": "q140", "format": "csv", "model": "claude-haiku-4-5", "expected": "98464", "actual": "98464", "correct": true, "inputTokens": 9121, "outputTokens": 6, "latencyMs": 1187 }, { "questionId": "q140", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "98464", "actual": "0", "correct": false, "inputTokens": 15480, "outputTokens": 2, "latencyMs": 8573 }, { "questionId": "q140", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "98464", "actual": "98464", "correct": true, "inputTokens": 15363, "outputTokens": 6, "latencyMs": 1311 }, { "questionId": "q140", "format": "yaml", "model": "gpt-4o-mini", "expected": "98464", "actual": "0", "correct": false, "inputTokens": 13170, "outputTokens": 2, "latencyMs": 3471 }, { "questionId": "q140", "format": "yaml", "model": "claude-haiku-4-5", "expected": "98464", "actual": "98464", "correct": true, "inputTokens": 14479, "outputTokens": 6, "latencyMs": 1457 }, { "questionId": "q141", "format": "json", "model": "gpt-4o-mini", "expected": "6378", "actual": "6378", "correct": true, "inputTokens": 15188, "outputTokens": 3, "latencyMs": 1363 }, { "questionId": "q141", "format": "json", "model": "claude-haiku-4-5", "expected": "6378", "actual": "6378", "correct": true, "inputTokens": 17408, "outputTokens": 6, "latencyMs": 1803 }, { "questionId": "q141", "format": "toon", "model": "gpt-4o-mini", "expected": "6378", "actual": "6378", "correct": true, "inputTokens": 8789, "outputTokens": 3, "latencyMs": 3696 }, { "questionId": "q141", "format": "toon", "model": "claude-haiku-4-5", "expected": "6378", "actual": "6378", "correct": true, "inputTokens": 9278, "outputTokens": 6, "latencyMs": 1391 }, { "questionId": "q141", "format": "csv", "model": "gpt-4o-mini", "expected": "6378", "actual": "93731", "correct": false, "inputTokens": 8557, "outputTokens": 3, "latencyMs": 7861 }, { "questionId": "q141", "format": "csv", "model": "claude-haiku-4-5", "expected": "6378", "actual": "6378", "correct": true, "inputTokens": 9124, "outputTokens": 6, "latencyMs": 1420 }, { "questionId": "q141", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "6378", "actual": "6378", "correct": true, "inputTokens": 15482, "outputTokens": 3, "latencyMs": 1769 }, { "questionId": "q141", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "6378", "actual": "6378", "correct": true, "inputTokens": 15366, "outputTokens": 6, "latencyMs": 1233 }, { "questionId": "q141", "format": "yaml", "model": "gpt-4o-mini", "expected": "6378", "actual": "93731", "correct": false, "inputTokens": 13172, "outputTokens": 3, "latencyMs": 1831 }, { "questionId": "q141", "format": "yaml", "model": "claude-haiku-4-5", "expected": "6378", "actual": "6378", "correct": true, "inputTokens": 14482, "outputTokens": 6, "latencyMs": 1507 }, { "questionId": "q142", "format": "json", "model": "gpt-4o-mini", "expected": "254916", "actual": "254916", "correct": true, "inputTokens": 15190, "outputTokens": 3, "latencyMs": 10752 }, { "questionId": "q142", "format": "json", "model": "claude-haiku-4-5", "expected": "254916", "actual": "254916", "correct": true, "inputTokens": 17409, "outputTokens": 6, "latencyMs": 1672 }, { "questionId": "q142", "format": "toon", "model": "gpt-4o-mini", "expected": "254916", "actual": "254916", "correct": true, "inputTokens": 8791, "outputTokens": 3, "latencyMs": 1788 }, { "questionId": "q142", "format": "toon", "model": "claude-haiku-4-5", "expected": "254916", "actual": "254916", "correct": true, "inputTokens": 9279, "outputTokens": 6, "latencyMs": 1633 }, { "questionId": "q142", "format": "csv", "model": "gpt-4o-mini", "expected": "254916", "actual": "254916", "correct": true, "inputTokens": 8559, "outputTokens": 3, "latencyMs": 1365 }, { "questionId": "q142", "format": "csv", "model": "claude-haiku-4-5", "expected": "254916", "actual": "254916", "correct": true, "inputTokens": 9125, "outputTokens": 6, "latencyMs": 1242 }, { "questionId": "q142", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "254916", "actual": "254916", "correct": true, "inputTokens": 15484, "outputTokens": 3, "latencyMs": 2237 }, { "questionId": "q142", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "254916", "actual": "254916", "correct": true, "inputTokens": 15367, "outputTokens": 6, "latencyMs": 1275 }, { "questionId": "q142", "format": "yaml", "model": "gpt-4o-mini", "expected": "254916", "actual": "254916", "correct": true, "inputTokens": 13174, "outputTokens": 3, "latencyMs": 3028 }, { "questionId": "q142", "format": "yaml", "model": "claude-haiku-4-5", "expected": "254916", "actual": "254916", "correct": true, "inputTokens": 14483, "outputTokens": 6, "latencyMs": 1615 }, { "questionId": "q143", "format": "json", "model": "gpt-4o-mini", "expected": "32413", "actual": "32413", "correct": true, "inputTokens": 15188, "outputTokens": 3, "latencyMs": 1972 }, { "questionId": "q143", "format": "json", "model": "claude-haiku-4-5", "expected": "32413", "actual": "32413", "correct": true, "inputTokens": 17410, "outputTokens": 6, "latencyMs": 2308 }, { "questionId": "q143", "format": "toon", "model": "gpt-4o-mini", "expected": "32413", "actual": "32413", "correct": true, "inputTokens": 8789, "outputTokens": 3, "latencyMs": 1361 }, { "questionId": "q143", "format": "toon", "model": "claude-haiku-4-5", "expected": "32413", "actual": "32413", "correct": true, "inputTokens": 9280, "outputTokens": 6, "latencyMs": 1162 }, { "questionId": "q143", "format": "csv", "model": "gpt-4o-mini", "expected": "32413", "actual": "32413", "correct": true, "inputTokens": 8557, "outputTokens": 3, "latencyMs": 2196 }, { "questionId": "q143", "format": "csv", "model": "claude-haiku-4-5", "expected": "32413", "actual": "32413", "correct": true, "inputTokens": 9126, "outputTokens": 6, "latencyMs": 1199 }, { "questionId": "q143", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "32413", "actual": "32413", "correct": true, "inputTokens": 15482, "outputTokens": 3, "latencyMs": 1758 }, { "questionId": "q143", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "32413", "actual": "32413", "correct": true, "inputTokens": 15368, "outputTokens": 6, "latencyMs": 1340 }, { "questionId": "q143", "format": "yaml", "model": "gpt-4o-mini", "expected": "32413", "actual": "32413", "correct": true, "inputTokens": 13172, "outputTokens": 3, "latencyMs": 2122 }, { "questionId": "q143", "format": "yaml", "model": "claude-haiku-4-5", "expected": "32413", "actual": "32413", "correct": true, "inputTokens": 14484, "outputTokens": 6, "latencyMs": 1156 }, { "questionId": "q144", "format": "json", "model": "gpt-4o-mini", "expected": "240059", "actual": "0", "correct": false, "inputTokens": 15186, "outputTokens": 2, "latencyMs": 1208 }, { "questionId": "q144", "format": "json", "model": "claude-haiku-4-5", "expected": "240059", "actual": "240059", "correct": true, "inputTokens": 17405, "outputTokens": 6, "latencyMs": 1826 }, { "questionId": "q144", "format": "toon", "model": "gpt-4o-mini", "expected": "240059", "actual": "undefined", "correct": false, "inputTokens": 8787, "outputTokens": 2, "latencyMs": 2224 }, { "questionId": "q144", "format": "toon", "model": "claude-haiku-4-5", "expected": "240059", "actual": "240059", "correct": true, "inputTokens": 9275, "outputTokens": 6, "latencyMs": 1220 }, { "questionId": "q144", "format": "csv", "model": "gpt-4o-mini", "expected": "240059", "actual": "undefined", "correct": false, "inputTokens": 8555, "outputTokens": 2, "latencyMs": 1199 }, { "questionId": "q144", "format": "csv", "model": "claude-haiku-4-5", "expected": "240059", "actual": "240059", "correct": true, "inputTokens": 9121, "outputTokens": 6, "latencyMs": 1264 }, { "questionId": "q144", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "240059", "actual": "undefined/react does not exist in the provided data.", "correct": false, "inputTokens": 15480, "outputTokens": 11, "latencyMs": 3072 }, { "questionId": "q144", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "240059", "actual": "240059", "correct": true, "inputTokens": 15363, "outputTokens": 6, "latencyMs": 1609 }, { "questionId": "q144", "format": "yaml", "model": "gpt-4o-mini", "expected": "240059", "actual": "undefined/react does not exist in the provided data.", "correct": false, "inputTokens": 13170, "outputTokens": 11, "latencyMs": 2608 }, { "questionId": "q144", "format": "yaml", "model": "claude-haiku-4-5", "expected": "240059", "actual": "240059", "correct": true, "inputTokens": 14479, "outputTokens": 6, "latencyMs": 1237 }, { "questionId": "q145", "format": "json", "model": "gpt-4o-mini", "expected": "48986", "actual": "0", "correct": false, "inputTokens": 15187, "outputTokens": 2, "latencyMs": 1906 }, { "questionId": "q145", "format": "json", "model": "claude-haiku-4-5", "expected": "48986", "actual": "48986", "correct": true, "inputTokens": 17406, "outputTokens": 6, "latencyMs": 1399 }, { "questionId": "q145", "format": "toon", "model": "gpt-4o-mini", "expected": "48986", "actual": "0", "correct": false, "inputTokens": 8788, "outputTokens": 2, "latencyMs": 2026 }, { "questionId": "q145", "format": "toon", "model": "claude-haiku-4-5", "expected": "48986", "actual": "48986", "correct": true, "inputTokens": 9276, "outputTokens": 6, "latencyMs": 1318 }, { "questionId": "q145", "format": "csv", "model": "gpt-4o-mini", "expected": "48986", "actual": "0", "correct": false, "inputTokens": 8556, "outputTokens": 2, "latencyMs": 1605 }, { "questionId": "q145", "format": "csv", "model": "claude-haiku-4-5", "expected": "48986", "actual": "48986", "correct": true, "inputTokens": 9122, "outputTokens": 6, "latencyMs": 1270 }, { "questionId": "q145", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "48986", "actual": "0", "correct": false, "inputTokens": 15481, "outputTokens": 2, "latencyMs": 5367 }, { "questionId": "q145", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "48986", "actual": "48986", "correct": true, "inputTokens": 15364, "outputTokens": 6, "latencyMs": 1204 }, { "questionId": "q145", "format": "yaml", "model": "gpt-4o-mini", "expected": "48986", "actual": "The repository \"undefined/Python\" does not exist in the provided data.", "correct": false, "inputTokens": 13171, "outputTokens": 16, "latencyMs": 6329 }, { "questionId": "q145", "format": "yaml", "model": "claude-haiku-4-5", "expected": "48986", "actual": "48986", "correct": true, "inputTokens": 14480, "outputTokens": 6, "latencyMs": 1369 }, { "questionId": "q146", "format": "json", "model": "gpt-4o-mini", "expected": "209624", "actual": "209624", "correct": true, "inputTokens": 15186, "outputTokens": 3, "latencyMs": 2063 }, { "questionId": "q146", "format": "json", "model": "claude-haiku-4-5", "expected": "209624", "actual": "209624", "correct": true, "inputTokens": 17405, "outputTokens": 6, "latencyMs": 1470 }, { "questionId": "q146", "format": "toon", "model": "gpt-4o-mini", "expected": "209624", "actual": "209624", "correct": true, "inputTokens": 8787, "outputTokens": 3, "latencyMs": 1386 }, { "questionId": "q146", "format": "toon", "model": "claude-haiku-4-5", "expected": "209624", "actual": "209624", "correct": true, "inputTokens": 9275, "outputTokens": 6, "latencyMs": 1104 }, { "questionId": "q146", "format": "csv", "model": "gpt-4o-mini", "expected": "209624", "actual": "209624", "correct": true, "inputTokens": 8555, "outputTokens": 3, "latencyMs": 1747 }, { "questionId": "q146", "format": "csv", "model": "claude-haiku-4-5", "expected": "209624", "actual": "209624", "correct": true, "inputTokens": 9121, "outputTokens": 6, "latencyMs": 1300 }, { "questionId": "q146", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "209624", "actual": "209624", "correct": true, "inputTokens": 15480, "outputTokens": 3, "latencyMs": 1443 }, { "questionId": "q146", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "209624", "actual": "209624", "correct": true, "inputTokens": 15363, "outputTokens": 6, "latencyMs": 1282 }, { "questionId": "q146", "format": "yaml", "model": "gpt-4o-mini", "expected": "209624", "actual": "209624", "correct": true, "inputTokens": 13170, "outputTokens": 3, "latencyMs": 2185 }, { "questionId": "q146", "format": "yaml", "model": "claude-haiku-4-5", "expected": "209624", "actual": "209624", "correct": true, "inputTokens": 14479, "outputTokens": 6, "latencyMs": 1407 }, { "questionId": "q147", "format": "json", "model": "gpt-4o-mini", "expected": "58023", "actual": "58023", "correct": true, "inputTokens": 15186, "outputTokens": 3, "latencyMs": 1743 }, { "questionId": "q147", "format": "json", "model": "claude-haiku-4-5", "expected": "58023", "actual": "58023", "correct": true, "inputTokens": 17406, "outputTokens": 6, "latencyMs": 1564 }, { "questionId": "q147", "format": "toon", "model": "gpt-4o-mini", "expected": "58023", "actual": "58023", "correct": true, "inputTokens": 8787, "outputTokens": 3, "latencyMs": 1317 }, { "questionId": "q147", "format": "toon", "model": "claude-haiku-4-5", "expected": "58023", "actual": "58023", "correct": true, "inputTokens": 9276, "outputTokens": 6, "latencyMs": 1258 }, { "questionId": "q147", "format": "csv", "model": "gpt-4o-mini", "expected": "58023", "actual": "58023", "correct": true, "inputTokens": 8555, "outputTokens": 3, "latencyMs": 2419 }, { "questionId": "q147", "format": "csv", "model": "claude-haiku-4-5", "expected": "58023", "actual": "58023", "correct": true, "inputTokens": 9122, "outputTokens": 6, "latencyMs": 1171 }, { "questionId": "q147", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "58023", "actual": "undefined/linux does not exist in the provided data.", "correct": false, "inputTokens": 15480, "outputTokens": 11, "latencyMs": 1680 }, { "questionId": "q147", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "58023", "actual": "58023", "correct": true, "inputTokens": 15364, "outputTokens": 6, "latencyMs": 1396 }, { "questionId": "q147", "format": "yaml", "model": "gpt-4o-mini", "expected": "58023", "actual": "The repository \"undefined/linux\" does not exist in the provided data.", "correct": false, "inputTokens": 13170, "outputTokens": 15, "latencyMs": 1418 }, { "questionId": "q147", "format": "yaml", "model": "claude-haiku-4-5", "expected": "58023", "actual": "58023", "correct": true, "inputTokens": 14480, "outputTokens": 6, "latencyMs": 1399 }, { "questionId": "q148", "format": "json", "model": "gpt-4o-mini", "expected": "196024", "actual": "196024", "correct": true, "inputTokens": 15189, "outputTokens": 3, "latencyMs": 1673 }, { "questionId": "q148", "format": "json", "model": "claude-haiku-4-5", "expected": "196024", "actual": "196024", "correct": true, "inputTokens": 17407, "outputTokens": 6, "latencyMs": 1736 }, { "questionId": "q148", "format": "toon", "model": "gpt-4o-mini", "expected": "196024", "actual": "196024", "correct": true, "inputTokens": 8790, "outputTokens": 3, "latencyMs": 1754 }, { "questionId": "q148", "format": "toon", "model": "claude-haiku-4-5", "expected": "196024", "actual": "196024", "correct": true, "inputTokens": 9277, "outputTokens": 6, "latencyMs": 1317 }, { "questionId": "q148", "format": "csv", "model": "gpt-4o-mini", "expected": "196024", "actual": "0", "correct": false, "inputTokens": 8558, "outputTokens": 2, "latencyMs": 3219 }, { "questionId": "q148", "format": "csv", "model": "claude-haiku-4-5", "expected": "196024", "actual": "196024", "correct": true, "inputTokens": 9123, "outputTokens": 6, "latencyMs": 1311 }, { "questionId": "q148", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "196024", "actual": "196024", "correct": true, "inputTokens": 15483, "outputTokens": 3, "latencyMs": 1346 }, { "questionId": "q148", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "196024", "actual": "196024", "correct": true, "inputTokens": 15365, "outputTokens": 6, "latencyMs": 1560 }, { "questionId": "q148", "format": "yaml", "model": "gpt-4o-mini", "expected": "196024", "actual": "196024", "correct": true, "inputTokens": 13173, "outputTokens": 3, "latencyMs": 1009 }, { "questionId": "q148", "format": "yaml", "model": "claude-haiku-4-5", "expected": "196024", "actual": "196024", "correct": true, "inputTokens": 14481, "outputTokens": 6, "latencyMs": 1446 }, { "questionId": "q149", "format": "json", "model": "gpt-4o-mini", "expected": "30919", "actual": "30919", "correct": true, "inputTokens": 15189, "outputTokens": 3, "latencyMs": 3361 }, { "questionId": "q149", "format": "json", "model": "claude-haiku-4-5", "expected": "30919", "actual": "30919", "correct": true, "inputTokens": 17408, "outputTokens": 6, "latencyMs": 1788 }, { "questionId": "q149", "format": "toon", "model": "gpt-4o-mini", "expected": "30919", "actual": "30919", "correct": true, "inputTokens": 8790, "outputTokens": 3, "latencyMs": 1123 }, { "questionId": "q149", "format": "toon", "model": "claude-haiku-4-5", "expected": "30919", "actual": "30919", "correct": true, "inputTokens": 9278, "outputTokens": 6, "latencyMs": 1235 }, { "questionId": "q149", "format": "csv", "model": "gpt-4o-mini", "expected": "30919", "actual": "30919", "correct": true, "inputTokens": 8558, "outputTokens": 3, "latencyMs": 1100 }, { "questionId": "q149", "format": "csv", "model": "claude-haiku-4-5", "expected": "30919", "actual": "30919", "correct": true, "inputTokens": 9124, "outputTokens": 6, "latencyMs": 1188 }, { "questionId": "q149", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "30919", "actual": "30919", "correct": true, "inputTokens": 15483, "outputTokens": 3, "latencyMs": 1557 }, { "questionId": "q149", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "30919", "actual": "30919", "correct": true, "inputTokens": 15366, "outputTokens": 6, "latencyMs": 1352 }, { "questionId": "q149", "format": "yaml", "model": "gpt-4o-mini", "expected": "30919", "actual": "30919", "correct": true, "inputTokens": 13173, "outputTokens": 3, "latencyMs": 1280 }, { "questionId": "q149", "format": "yaml", "model": "claude-haiku-4-5", "expected": "30919", "actual": "30919", "correct": true, "inputTokens": 14482, "outputTokens": 6, "latencyMs": 1247 }, { "questionId": "q150", "format": "json", "model": "gpt-4o-mini", "expected": "192220", "actual": "192220", "correct": true, "inputTokens": 15188, "outputTokens": 3, "latencyMs": 1394 }, { "questionId": "q150", "format": "json", "model": "claude-haiku-4-5", "expected": "192220", "actual": "192220", "correct": true, "inputTokens": 17405, "outputTokens": 6, "latencyMs": 1801 }, { "questionId": "q150", "format": "toon", "model": "gpt-4o-mini", "expected": "192220", "actual": "192220", "correct": true, "inputTokens": 8789, "outputTokens": 3, "latencyMs": 2052 }, { "questionId": "q150", "format": "toon", "model": "claude-haiku-4-5", "expected": "192220", "actual": "192220", "correct": true, "inputTokens": 9275, "outputTokens": 6, "latencyMs": 1176 }, { "questionId": "q150", "format": "csv", "model": "gpt-4o-mini", "expected": "192220", "actual": "192220", "correct": true, "inputTokens": 8557, "outputTokens": 3, "latencyMs": 2084 }, { "questionId": "q150", "format": "csv", "model": "claude-haiku-4-5", "expected": "192220", "actual": "192220", "correct": true, "inputTokens": 9121, "outputTokens": 6, "latencyMs": 1191 }, { "questionId": "q150", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "192220", "actual": "192220", "correct": true, "inputTokens": 15482, "outputTokens": 3, "latencyMs": 1261 }, { "questionId": "q150", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "192220", "actual": "192220", "correct": true, "inputTokens": 15363, "outputTokens": 6, "latencyMs": 1355 }, { "questionId": "q150", "format": "yaml", "model": "gpt-4o-mini", "expected": "192220", "actual": "192220", "correct": true, "inputTokens": 13172, "outputTokens": 3, "latencyMs": 3388 }, { "questionId": "q150", "format": "yaml", "model": "claude-haiku-4-5", "expected": "192220", "actual": "192220", "correct": true, "inputTokens": 14479, "outputTokens": 6, "latencyMs": 1591 }, { "questionId": "q151", "format": "json", "model": "gpt-4o-mini", "expected": "11763", "actual": "11763", "correct": true, "inputTokens": 15191, "outputTokens": 3, "latencyMs": 1942 }, { "questionId": "q151", "format": "json", "model": "claude-haiku-4-5", "expected": "11763", "actual": "11763", "correct": true, "inputTokens": 17414, "outputTokens": 6, "latencyMs": 1340 }, { "questionId": "q151", "format": "toon", "model": "gpt-4o-mini", "expected": "11763", "actual": "11763", "correct": true, "inputTokens": 8792, "outputTokens": 3, "latencyMs": 1443 }, { "questionId": "q151", "format": "toon", "model": "claude-haiku-4-5", "expected": "11763", "actual": "11763", "correct": true, "inputTokens": 9284, "outputTokens": 6, "latencyMs": 1732 }, { "questionId": "q151", "format": "csv", "model": "gpt-4o-mini", "expected": "11763", "actual": "11763", "correct": true, "inputTokens": 8560, "outputTokens": 3, "latencyMs": 1994 }, { "questionId": "q151", "format": "csv", "model": "claude-haiku-4-5", "expected": "11763", "actual": "11763", "correct": true, "inputTokens": 9130, "outputTokens": 6, "latencyMs": 1198 }, { "questionId": "q151", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "11763", "actual": "11763", "correct": true, "inputTokens": 15485, "outputTokens": 3, "latencyMs": 5013 }, { "questionId": "q151", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "11763", "actual": "11763", "correct": true, "inputTokens": 15372, "outputTokens": 6, "latencyMs": 1463 }, { "questionId": "q151", "format": "yaml", "model": "gpt-4o-mini", "expected": "11763", "actual": "11763", "correct": true, "inputTokens": 13175, "outputTokens": 3, "latencyMs": 1296 }, { "questionId": "q151", "format": "yaml", "model": "claude-haiku-4-5", "expected": "11763", "actual": "11763", "correct": true, "inputTokens": 14488, "outputTokens": 6, "latencyMs": 2877 }, { "questionId": "q152", "format": "json", "model": "gpt-4o-mini", "expected": "100", "actual": "0", "correct": false, "inputTokens": 15188, "outputTokens": 2, "latencyMs": 2160 }, { "questionId": "q152", "format": "json", "model": "claude-haiku-4-5", "expected": "100", "actual": "0", "correct": false, "inputTokens": 17406, "outputTokens": 5, "latencyMs": 1947 }, { "questionId": "q152", "format": "toon", "model": "gpt-4o-mini", "expected": "100", "actual": "0", "correct": false, "inputTokens": 8789, "outputTokens": 2, "latencyMs": 1222 }, { "questionId": "q152", "format": "toon", "model": "claude-haiku-4-5", "expected": "100", "actual": "0", "correct": false, "inputTokens": 9276, "outputTokens": 5, "latencyMs": 1487 }, { "questionId": "q152", "format": "csv", "model": "gpt-4o-mini", "expected": "100", "actual": "0", "correct": false, "inputTokens": 8557, "outputTokens": 2, "latencyMs": 1450 }, { "questionId": "q152", "format": "csv", "model": "claude-haiku-4-5", "expected": "100", "actual": "0", "correct": false, "inputTokens": 9122, "outputTokens": 5, "latencyMs": 1358 }, { "questionId": "q152", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "100", "actual": "0", "correct": false, "inputTokens": 15482, "outputTokens": 2, "latencyMs": 873 }, { "questionId": "q152", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "100", "actual": "100", "correct": true, "inputTokens": 15364, "outputTokens": 5, "latencyMs": 1500 }, { "questionId": "q152", "format": "yaml", "model": "gpt-4o-mini", "expected": "100", "actual": "0", "correct": false, "inputTokens": 13172, "outputTokens": 2, "latencyMs": 7031 }, { "questionId": "q152", "format": "yaml", "model": "claude-haiku-4-5", "expected": "100", "actual": "0", "correct": false, "inputTokens": 14480, "outputTokens": 5, "latencyMs": 1916 }, { "questionId": "q153", "format": "json", "model": "gpt-4o-mini", "expected": "15404143", "actual": "43115556", "correct": false, "inputTokens": 15189, "outputTokens": 4, "latencyMs": 3324 }, { "questionId": "q153", "format": "json", "model": "claude-haiku-4-5", "expected": "15404143", "actual": "13,847,892", "correct": false, "inputTokens": 17407, "outputTokens": 9, "latencyMs": 1607 }, { "questionId": "q153", "format": "toon", "model": "gpt-4o-mini", "expected": "15404143", "actual": "10419582", "correct": false, "inputTokens": 8790, "outputTokens": 4, "latencyMs": 900 }, { "questionId": "q153", "format": "toon", "model": "claude-haiku-4-5", "expected": "15404143", "actual": "13,847,892", "correct": false, "inputTokens": 9277, "outputTokens": 9, "latencyMs": 1385 }, { "questionId": "q153", "format": "csv", "model": "gpt-4o-mini", "expected": "15404143", "actual": "10419582", "correct": false, "inputTokens": 8558, "outputTokens": 4, "latencyMs": 1922 }, { "questionId": "q153", "format": "csv", "model": "claude-haiku-4-5", "expected": "15404143", "actual": "15,847,892", "correct": false, "inputTokens": 9123, "outputTokens": 9, "latencyMs": 1230 }, { "questionId": "q153", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "15404143", "actual": "10419580", "correct": false, "inputTokens": 15483, "outputTokens": 4, "latencyMs": 1716 }, { "questionId": "q153", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "15404143", "actual": "13,847,892", "correct": false, "inputTokens": 15365, "outputTokens": 9, "latencyMs": 1384 }, { "questionId": "q153", "format": "yaml", "model": "gpt-4o-mini", "expected": "15404143", "actual": "Total number of stars across all repositories is 4,978,155.", "correct": false, "inputTokens": 13173, "outputTokens": 16, "latencyMs": 3411 }, { "questionId": "q153", "format": "yaml", "model": "claude-haiku-4-5", "expected": "15404143", "actual": "13,847,892", "correct": false, "inputTokens": 14481, "outputTokens": 9, "latencyMs": 1539 }, { "questionId": "q154", "format": "json", "model": "gpt-4o-mini", "expected": "100", "actual": "77", "correct": false, "inputTokens": 15189, "outputTokens": 2, "latencyMs": 2523 }, { "questionId": "q154", "format": "json", "model": "claude-haiku-4-5", "expected": "100", "actual": "100", "correct": true, "inputTokens": 17408, "outputTokens": 5, "latencyMs": 1885 }, { "questionId": "q154", "format": "toon", "model": "gpt-4o-mini", "expected": "100", "actual": "42", "correct": false, "inputTokens": 8790, "outputTokens": 2, "latencyMs": 1148 }, { "questionId": "q154", "format": "toon", "model": "claude-haiku-4-5", "expected": "100", "actual": "100", "correct": true, "inputTokens": 9278, "outputTokens": 5, "latencyMs": 1378 }, { "questionId": "q154", "format": "csv", "model": "gpt-4o-mini", "expected": "100", "actual": "42", "correct": false, "inputTokens": 8558, "outputTokens": 2, "latencyMs": 1364 }, { "questionId": "q154", "format": "csv", "model": "claude-haiku-4-5", "expected": "100", "actual": "100", "correct": true, "inputTokens": 9124, "outputTokens": 5, "latencyMs": 1125 }, { "questionId": "q154", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "100", "actual": "104", "correct": false, "inputTokens": 15483, "outputTokens": 2, "latencyMs": 1276 }, { "questionId": "q154", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "100", "actual": "100", "correct": true, "inputTokens": 15366, "outputTokens": 5, "latencyMs": 1331 }, { "questionId": "q154", "format": "yaml", "model": "gpt-4o-mini", "expected": "100", "actual": "77", "correct": false, "inputTokens": 13173, "outputTokens": 2, "latencyMs": 1534 }, { "questionId": "q154", "format": "yaml", "model": "claude-haiku-4-5", "expected": "100", "actual": "100", "correct": true, "inputTokens": 14482, "outputTokens": 5, "latencyMs": 1282 }, { "questionId": "q155", "format": "json", "model": "gpt-4o-mini", "expected": "100", "actual": "19", "correct": false, "inputTokens": 15189, "outputTokens": 2, "latencyMs": 2206 }, { "questionId": "q155", "format": "json", "model": "claude-haiku-4-5", "expected": "100", "actual": "71", "correct": false, "inputTokens": 17408, "outputTokens": 5, "latencyMs": 1568 }, { "questionId": "q155", "format": "toon", "model": "gpt-4o-mini", "expected": "100", "actual": "15", "correct": false, "inputTokens": 8790, "outputTokens": 2, "latencyMs": 1478 }, { "questionId": "q155", "format": "toon", "model": "claude-haiku-4-5", "expected": "100", "actual": "42", "correct": false, "inputTokens": 9278, "outputTokens": 5, "latencyMs": 1314 }, { "questionId": "q155", "format": "csv", "model": "gpt-4o-mini", "expected": "100", "actual": "12", "correct": false, "inputTokens": 8558, "outputTokens": 2, "latencyMs": 2149 }, { "questionId": "q155", "format": "csv", "model": "claude-haiku-4-5", "expected": "100", "actual": "47", "correct": false, "inputTokens": 9124, "outputTokens": 5, "latencyMs": 1485 }, { "questionId": "q155", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "100", "actual": "34", "correct": false, "inputTokens": 15483, "outputTokens": 2, "latencyMs": 1043 }, { "questionId": "q155", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "100", "actual": "71", "correct": false, "inputTokens": 15366, "outputTokens": 5, "latencyMs": 1371 }, { "questionId": "q155", "format": "yaml", "model": "gpt-4o-mini", "expected": "100", "actual": "34", "correct": false, "inputTokens": 13173, "outputTokens": 2, "latencyMs": 1693 }, { "questionId": "q155", "format": "yaml", "model": "claude-haiku-4-5", "expected": "100", "actual": "71", "correct": false, "inputTokens": 14482, "outputTokens": 5, "latencyMs": 1237 }, { "questionId": "q156", "format": "json", "model": "gpt-4o-mini", "expected": "76", "actual": "82", "correct": false, "inputTokens": 15189, "outputTokens": 2, "latencyMs": 927 }, { "questionId": "q156", "format": "json", "model": "claude-haiku-4-5", "expected": "76", "actual": "100", "correct": false, "inputTokens": 17408, "outputTokens": 5, "latencyMs": 1274 }, { "questionId": "q156", "format": "toon", "model": "gpt-4o-mini", "expected": "76", "actual": "34", "correct": false, "inputTokens": 8790, "outputTokens": 2, "latencyMs": 2541 }, { "questionId": "q156", "format": "toon", "model": "claude-haiku-4-5", "expected": "76", "actual": "100", "correct": false, "inputTokens": 9278, "outputTokens": 5, "latencyMs": 1116 }, { "questionId": "q156", "format": "csv", "model": "gpt-4o-mini", "expected": "76", "actual": "34", "correct": false, "inputTokens": 8558, "outputTokens": 2, "latencyMs": 997 }, { "questionId": "q156", "format": "csv", "model": "claude-haiku-4-5", "expected": "76", "actual": "100", "correct": false, "inputTokens": 9124, "outputTokens": 5, "latencyMs": 1513 }, { "questionId": "q156", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "76", "actual": "104", "correct": false, "inputTokens": 15483, "outputTokens": 2, "latencyMs": 3168 }, { "questionId": "q156", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "76", "actual": "100", "correct": false, "inputTokens": 15366, "outputTokens": 5, "latencyMs": 1498 }, { "questionId": "q156", "format": "yaml", "model": "gpt-4o-mini", "expected": "76", "actual": "66", "correct": false, "inputTokens": 13173, "outputTokens": 2, "latencyMs": 1600 }, { "questionId": "q156", "format": "yaml", "model": "claude-haiku-4-5", "expected": "76", "actual": "100", "correct": false, "inputTokens": 14482, "outputTokens": 5, "latencyMs": 1519 }, { "questionId": "q157", "format": "json", "model": "gpt-4o-mini", "expected": "100", "actual": "77", "correct": false, "inputTokens": 15189, "outputTokens": 2, "latencyMs": 1809 }, { "questionId": "q157", "format": "json", "model": "claude-haiku-4-5", "expected": "100", "actual": "89", "correct": false, "inputTokens": 17409, "outputTokens": 5, "latencyMs": 1409 }, { "questionId": "q157", "format": "toon", "model": "gpt-4o-mini", "expected": "100", "actual": "66", "correct": false, "inputTokens": 8790, "outputTokens": 2, "latencyMs": 1367 }, { "questionId": "q157", "format": "toon", "model": "claude-haiku-4-5", "expected": "100", "actual": "73", "correct": false, "inputTokens": 9279, "outputTokens": 5, "latencyMs": 1296 }, { "questionId": "q157", "format": "csv", "model": "gpt-4o-mini", "expected": "100", "actual": "66", "correct": false, "inputTokens": 8558, "outputTokens": 2, "latencyMs": 1162 }, { "questionId": "q157", "format": "csv", "model": "claude-haiku-4-5", "expected": "100", "actual": "89", "correct": false, "inputTokens": 9125, "outputTokens": 5, "latencyMs": 1435 }, { "questionId": "q157", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "100", "actual": "77", "correct": false, "inputTokens": 15483, "outputTokens": 2, "latencyMs": 1774 }, { "questionId": "q157", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "100", "actual": "95", "correct": false, "inputTokens": 15367, "outputTokens": 5, "latencyMs": 1479 }, { "questionId": "q157", "format": "yaml", "model": "gpt-4o-mini", "expected": "100", "actual": "66", "correct": false, "inputTokens": 13173, "outputTokens": 2, "latencyMs": 2710 }, { "questionId": "q157", "format": "yaml", "model": "claude-haiku-4-5", "expected": "100", "actual": "95", "correct": false, "inputTokens": 14483, "outputTokens": 5, "latencyMs": 1272 }, { "questionId": "q158", "format": "json", "model": "gpt-4o-mini", "expected": "95", "actual": "42", "correct": false, "inputTokens": 15189, "outputTokens": 2, "latencyMs": 3038 }, { "questionId": "q158", "format": "json", "model": "claude-haiku-4-5", "expected": "95", "actual": "42", "correct": false, "inputTokens": 17409, "outputTokens": 5, "latencyMs": 1562 }, { "questionId": "q158", "format": "toon", "model": "gpt-4o-mini", "expected": "95", "actual": "38", "correct": false, "inputTokens": 8790, "outputTokens": 2, "latencyMs": 1536 }, { "questionId": "q158", "format": "toon", "model": "claude-haiku-4-5", "expected": "95", "actual": "42", "correct": false, "inputTokens": 9279, "outputTokens": 5, "latencyMs": 1216 }, { "questionId": "q158", "format": "csv", "model": "gpt-4o-mini", "expected": "95", "actual": "34", "correct": false, "inputTokens": 8558, "outputTokens": 2, "latencyMs": 1760 }, { "questionId": "q158", "format": "csv", "model": "claude-haiku-4-5", "expected": "95", "actual": "42", "correct": false, "inputTokens": 9125, "outputTokens": 5, "latencyMs": 1255 }, { "questionId": "q158", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "95", "actual": "66", "correct": false, "inputTokens": 15483, "outputTokens": 2, "latencyMs": 1683 }, { "questionId": "q158", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "95", "actual": "47", "correct": false, "inputTokens": 15367, "outputTokens": 5, "latencyMs": 2256 }, { "questionId": "q158", "format": "yaml", "model": "gpt-4o-mini", "expected": "95", "actual": "38", "correct": false, "inputTokens": 13173, "outputTokens": 2, "latencyMs": 2831 }, { "questionId": "q158", "format": "yaml", "model": "claude-haiku-4-5", "expected": "95", "actual": "47", "correct": false, "inputTokens": 14483, "outputTokens": 5, "latencyMs": 1980 }, { "questionId": "q159", "format": "json", "model": "gpt-4o-mini", "expected": "83", "actual": "66", "correct": false, "inputTokens": 15189, "outputTokens": 2, "latencyMs": 1327 }, { "questionId": "q159", "format": "json", "model": "claude-haiku-4-5", "expected": "83", "actual": "71", "correct": false, "inputTokens": 17409, "outputTokens": 5, "latencyMs": 1894 }, { "questionId": "q159", "format": "toon", "model": "gpt-4o-mini", "expected": "83", "actual": "34", "correct": false, "inputTokens": 8790, "outputTokens": 2, "latencyMs": 784 }, { "questionId": "q159", "format": "toon", "model": "claude-haiku-4-5", "expected": "83", "actual": "73", "correct": false, "inputTokens": 9279, "outputTokens": 5, "latencyMs": 1422 }, { "questionId": "q159", "format": "csv", "model": "gpt-4o-mini", "expected": "83", "actual": "34", "correct": false, "inputTokens": 8558, "outputTokens": 2, "latencyMs": 2644 }, { "questionId": "q159", "format": "csv", "model": "claude-haiku-4-5", "expected": "83", "actual": "73", "correct": false, "inputTokens": 9125, "outputTokens": 5, "latencyMs": 1109 }, { "questionId": "q159", "format": "markdown-kv", "model": "gpt-4o-mini", "expected": "83", "actual": "66", "correct": false, "inputTokens": 15483, "outputTokens": 2, "latencyMs": 1826 }, { "questionId": "q159", "format": "markdown-kv", "model": "claude-haiku-4-5", "expected": "83", "actual": "71", "correct": false, "inputTokens": 15367, "outputTokens": 5, "latencyMs": 1342 }, { "questionId": "q159", "format": "yaml", "model": "gpt-4o-mini", "expected": "83", "actual": "38", "correct": false, "inputTokens": 13173, "outputTokens": 2, "latencyMs": 2055 }, { "questionId": "q159", "format": "yaml", "model": "claude-haiku-4-5", "expected": "83", "actual": "71", "correct": false, "inputTokens": 14483, "outputTokens": 5, "latencyMs": 1537 } ]