Files
toon/benchmarks/results/accuracy/models/gpt-5-nano
2025-10-28 20:22:43 +01:00

1 line
134 KiB
Plaintext

[{"questionId":"q1","format":"json","model":"gpt-5-nano","expected":"56176","actual":"56176","isCorrect":true,"inputTokens":6390,"outputTokens":72,"latencyMs":2286.895917},{"questionId":"q1","format":"toon","model":"gpt-5-nano","expected":"56176","actual":"56176","isCorrect":true,"inputTokens":2527,"outputTokens":72,"latencyMs":2080.2120830000003},{"questionId":"q1","format":"csv","model":"gpt-5-nano","expected":"56176","actual":"56176","isCorrect":true,"inputTokens":2381,"outputTokens":72,"latencyMs":2368.424333000001},{"questionId":"q1","format":"xml","model":"gpt-5-nano","expected":"56176","actual":"56176","isCorrect":true,"inputTokens":7357,"outputTokens":136,"latencyMs":3603.5194579999998},{"questionId":"q1","format":"yaml","model":"gpt-5-nano","expected":"56176","actual":"56176","isCorrect":true,"inputTokens":5012,"outputTokens":72,"latencyMs":2783.105292},{"questionId":"q2","format":"json","model":"gpt-5-nano","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":6390,"outputTokens":71,"latencyMs":3301.9922080000006},{"questionId":"q2","format":"toon","model":"gpt-5-nano","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":2527,"outputTokens":71,"latencyMs":2579.989917000001},{"questionId":"q2","format":"csv","model":"gpt-5-nano","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":2381,"outputTokens":71,"latencyMs":2071.0654589999995},{"questionId":"q2","format":"xml","model":"gpt-5-nano","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":7357,"outputTokens":135,"latencyMs":2436.658125000001},{"questionId":"q2","format":"yaml","model":"gpt-5-nano","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":5012,"outputTokens":71,"latencyMs":3412.149292},{"questionId":"q3","format":"json","model":"gpt-5-nano","expected":"lorenza.kunze@yahoo.com","actual":"lorenza.kunze@yahoo.com","isCorrect":true,"inputTokens":6392,"outputTokens":140,"latencyMs":5299.290375},{"questionId":"q3","format":"toon","model":"gpt-5-nano","expected":"lorenza.kunze@yahoo.com","actual":"lorenza.kunze@yahoo.com","isCorrect":true,"inputTokens":2529,"outputTokens":76,"latencyMs":2528.3222079999996},{"questionId":"q3","format":"csv","model":"gpt-5-nano","expected":"lorenza.kunze@yahoo.com","actual":"lorenza.kunze@yahoo.com","isCorrect":true,"inputTokens":2383,"outputTokens":140,"latencyMs":3022.2497079999994},{"questionId":"q3","format":"xml","model":"gpt-5-nano","expected":"lorenza.kunze@yahoo.com","actual":"lorenza.kunze@yahoo.com","isCorrect":true,"inputTokens":7359,"outputTokens":204,"latencyMs":3238.962124999999},{"questionId":"q3","format":"yaml","model":"gpt-5-nano","expected":"lorenza.kunze@yahoo.com","actual":"lorenza.kunze@yahoo.com","isCorrect":true,"inputTokens":5014,"outputTokens":140,"latencyMs":2557.434041999999},{"questionId":"q4","format":"json","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":6392,"outputTokens":71,"latencyMs":3143.1138339999998},{"questionId":"q4","format":"toon","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":2529,"outputTokens":71,"latencyMs":2368.6757910000006},{"questionId":"q4","format":"csv","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":2383,"outputTokens":71,"latencyMs":2801.3656659999997},{"questionId":"q4","format":"xml","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":7359,"outputTokens":199,"latencyMs":3047.416791999999},{"questionId":"q4","format":"yaml","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":5014,"outputTokens":71,"latencyMs":3128.1965420000006},{"questionId":"q5","format":"json","model":"gpt-5-nano","expected":"no","actual":"No","isCorrect":true,"inputTokens":6388,"outputTokens":199,"latencyMs":2972.251875},{"questionId":"q5","format":"toon","model":"gpt-5-nano","expected":"no","actual":"false","isCorrect":true,"inputTokens":2525,"outputTokens":327,"latencyMs":4319.013167000001},{"questionId":"q5","format":"csv","model":"gpt-5-nano","expected":"no","actual":"No","isCorrect":true,"inputTokens":2379,"outputTokens":647,"latencyMs":8503.479375},{"questionId":"q5","format":"xml","model":"gpt-5-nano","expected":"no","actual":"false","isCorrect":true,"inputTokens":7355,"outputTokens":263,"latencyMs":3787.869708},{"questionId":"q5","format":"yaml","model":"gpt-5-nano","expected":"no","actual":"false","isCorrect":true,"inputTokens":5010,"outputTokens":327,"latencyMs":4215.784416999999},{"questionId":"q6","format":"json","model":"gpt-5-nano","expected":"133081","actual":"133081","isCorrect":true,"inputTokens":6389,"outputTokens":72,"latencyMs":2708.392833},{"questionId":"q6","format":"toon","model":"gpt-5-nano","expected":"133081","actual":"133081","isCorrect":true,"inputTokens":2526,"outputTokens":72,"latencyMs":2004.1692079999993},{"questionId":"q6","format":"csv","model":"gpt-5-nano","expected":"133081","actual":"133081","isCorrect":true,"inputTokens":2380,"outputTokens":136,"latencyMs":2530.7687079999996},{"questionId":"q6","format":"xml","model":"gpt-5-nano","expected":"133081","actual":"133081","isCorrect":true,"inputTokens":7356,"outputTokens":136,"latencyMs":2244.525791},{"questionId":"q6","format":"yaml","model":"gpt-5-nano","expected":"133081","actual":"133081","isCorrect":true,"inputTokens":5011,"outputTokens":136,"latencyMs":2472.8984170000003},{"questionId":"q7","format":"json","model":"gpt-5-nano","expected":"Engineering","actual":"Engineering","isCorrect":true,"inputTokens":6390,"outputTokens":7,"latencyMs":1896.0880000000016},{"questionId":"q7","format":"toon","model":"gpt-5-nano","expected":"Engineering","actual":"Engineering","isCorrect":true,"inputTokens":2527,"outputTokens":71,"latencyMs":2263.058832999999},{"questionId":"q7","format":"csv","model":"gpt-5-nano","expected":"Engineering","actual":"Engineering","isCorrect":true,"inputTokens":2381,"outputTokens":71,"latencyMs":2069.6880410000012},{"questionId":"q7","format":"xml","model":"gpt-5-nano","expected":"Engineering","actual":"Engineering","isCorrect":true,"inputTokens":7357,"outputTokens":135,"latencyMs":2421.5882500000007},{"questionId":"q7","format":"yaml","model":"gpt-5-nano","expected":"Engineering","actual":"Engineering","isCorrect":true,"inputTokens":5012,"outputTokens":71,"latencyMs":2004.3543750000008},{"questionId":"q8","format":"json","model":"gpt-5-nano","expected":"delpha.russel@gmail.com","actual":"delpha.russel@gmail.com","isCorrect":true,"inputTokens":6391,"outputTokens":77,"latencyMs":2184.6345},{"questionId":"q8","format":"toon","model":"gpt-5-nano","expected":"delpha.russel@gmail.com","actual":"delpha.russel@gmail.com","isCorrect":true,"inputTokens":2528,"outputTokens":141,"latencyMs":3463.506875000001},{"questionId":"q8","format":"csv","model":"gpt-5-nano","expected":"delpha.russel@gmail.com","actual":"delpha.russel@gmail.com","isCorrect":true,"inputTokens":2382,"outputTokens":141,"latencyMs":2491.552375000001},{"questionId":"q8","format":"xml","model":"gpt-5-nano","expected":"delpha.russel@gmail.com","actual":"delpha.russel@gmail.com","isCorrect":true,"inputTokens":7358,"outputTokens":141,"latencyMs":2773.072124999999},{"questionId":"q8","format":"yaml","model":"gpt-5-nano","expected":"delpha.russel@gmail.com","actual":"delpha.russel@gmail.com","isCorrect":true,"inputTokens":5013,"outputTokens":77,"latencyMs":3364.1551249999993},{"questionId":"q9","format":"json","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":6393,"outputTokens":263,"latencyMs":8407.982375000001},{"questionId":"q9","format":"toon","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":2530,"outputTokens":135,"latencyMs":3553.8328330000004},{"questionId":"q9","format":"csv","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":2384,"outputTokens":455,"latencyMs":8108.107749999997},{"questionId":"q9","format":"xml","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":7360,"outputTokens":199,"latencyMs":7765.270042},{"questionId":"q9","format":"yaml","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":5015,"outputTokens":135,"latencyMs":2643.0200829999994},{"questionId":"q10","format":"json","model":"gpt-5-nano","expected":"yes","actual":"Yes","isCorrect":true,"inputTokens":6390,"outputTokens":135,"latencyMs":3184.6242919999986},{"questionId":"q10","format":"toon","model":"gpt-5-nano","expected":"yes","actual":"true","isCorrect":true,"inputTokens":2527,"outputTokens":263,"latencyMs":3714.655332999999},{"questionId":"q10","format":"csv","model":"gpt-5-nano","expected":"yes","actual":"Yes","isCorrect":true,"inputTokens":2381,"outputTokens":135,"latencyMs":2412.2727080000004},{"questionId":"q10","format":"xml","model":"gpt-5-nano","expected":"yes","actual":"true","isCorrect":true,"inputTokens":7357,"outputTokens":391,"latencyMs":4610.666667000001},{"questionId":"q10","format":"yaml","model":"gpt-5-nano","expected":"yes","actual":"Yes","isCorrect":true,"inputTokens":5012,"outputTokens":135,"latencyMs":2337.4404170000016},{"questionId":"q11","format":"json","model":"gpt-5-nano","expected":"109064","actual":"109064","isCorrect":true,"inputTokens":6390,"outputTokens":264,"latencyMs":3417.8135},{"questionId":"q11","format":"toon","model":"gpt-5-nano","expected":"109064","actual":"109064","isCorrect":true,"inputTokens":2527,"outputTokens":200,"latencyMs":2675.2862499999974},{"questionId":"q11","format":"csv","model":"gpt-5-nano","expected":"109064","actual":"109064","isCorrect":true,"inputTokens":2381,"outputTokens":136,"latencyMs":4805.800959},{"questionId":"q11","format":"xml","model":"gpt-5-nano","expected":"109064","actual":"109064","isCorrect":true,"inputTokens":7357,"outputTokens":136,"latencyMs":2649.596416999997},{"questionId":"q11","format":"yaml","model":"gpt-5-nano","expected":"109064","actual":"109064","isCorrect":true,"inputTokens":5012,"outputTokens":72,"latencyMs":2322.7847089999996},{"questionId":"q12","format":"json","model":"gpt-5-nano","expected":"Operations","actual":"Operations","isCorrect":true,"inputTokens":6388,"outputTokens":263,"latencyMs":4128.735457999999},{"questionId":"q12","format":"toon","model":"gpt-5-nano","expected":"Operations","actual":"Operations","isCorrect":true,"inputTokens":2525,"outputTokens":71,"latencyMs":2963.8491250000006},{"questionId":"q12","format":"csv","model":"gpt-5-nano","expected":"Operations","actual":"Operations","isCorrect":true,"inputTokens":2379,"outputTokens":71,"latencyMs":3226.5830000000024},{"questionId":"q12","format":"xml","model":"gpt-5-nano","expected":"Operations","actual":"Operations","isCorrect":true,"inputTokens":7355,"outputTokens":135,"latencyMs":3400.928915999997},{"questionId":"q12","format":"yaml","model":"gpt-5-nano","expected":"Operations","actual":"Operations","isCorrect":true,"inputTokens":5010,"outputTokens":711,"latencyMs":8393.479792000002},{"questionId":"q13","format":"json","model":"gpt-5-nano","expected":"henderson70@yahoo.com","actual":"henderson70@yahoo.com","isCorrect":true,"inputTokens":6389,"outputTokens":75,"latencyMs":2083.456707999998},{"questionId":"q13","format":"toon","model":"gpt-5-nano","expected":"henderson70@yahoo.com","actual":"henderson70@yahoo.com","isCorrect":true,"inputTokens":2526,"outputTokens":75,"latencyMs":2282.820208000001},{"questionId":"q13","format":"csv","model":"gpt-5-nano","expected":"henderson70@yahoo.com","actual":"henderson70@yahoo.com","isCorrect":true,"inputTokens":2380,"outputTokens":139,"latencyMs":1922.3527920000015},{"questionId":"q13","format":"xml","model":"gpt-5-nano","expected":"henderson70@yahoo.com","actual":"henderson70@yahoo.com","isCorrect":true,"inputTokens":7356,"outputTokens":139,"latencyMs":1967.7009160000016},{"questionId":"q13","format":"yaml","model":"gpt-5-nano","expected":"henderson70@yahoo.com","actual":"henderson70@yahoo.com","isCorrect":true,"inputTokens":5011,"outputTokens":75,"latencyMs":2097.907542000001},{"questionId":"q14","format":"json","model":"gpt-5-nano","expected":"23","actual":"23","isCorrect":true,"inputTokens":6391,"outputTokens":135,"latencyMs":3816.0825000000004},{"questionId":"q14","format":"toon","model":"gpt-5-nano","expected":"23","actual":"23","isCorrect":true,"inputTokens":2528,"outputTokens":71,"latencyMs":1841.6428339999984},{"questionId":"q14","format":"csv","model":"gpt-5-nano","expected":"23","actual":"23","isCorrect":true,"inputTokens":2382,"outputTokens":135,"latencyMs":2661.6788750000014},{"questionId":"q14","format":"xml","model":"gpt-5-nano","expected":"23","actual":"23","isCorrect":true,"inputTokens":7358,"outputTokens":135,"latencyMs":3028.1100410000035},{"questionId":"q14","format":"yaml","model":"gpt-5-nano","expected":"23","actual":"23","isCorrect":true,"inputTokens":5013,"outputTokens":135,"latencyMs":2456.2266249999993},{"questionId":"q15","format":"json","model":"gpt-5-nano","expected":"yes","actual":"Yes","isCorrect":true,"inputTokens":6388,"outputTokens":135,"latencyMs":2595.8724580000016},{"questionId":"q15","format":"toon","model":"gpt-5-nano","expected":"yes","actual":"true","isCorrect":true,"inputTokens":2525,"outputTokens":199,"latencyMs":3002.6034579999978},{"questionId":"q15","format":"csv","model":"gpt-5-nano","expected":"yes","actual":"Yes","isCorrect":true,"inputTokens":2379,"outputTokens":263,"latencyMs":3817.756000000001},{"questionId":"q15","format":"xml","model":"gpt-5-nano","expected":"yes","actual":"Yes","isCorrect":true,"inputTokens":7355,"outputTokens":455,"latencyMs":4972.323082999999},{"questionId":"q15","format":"yaml","model":"gpt-5-nano","expected":"yes","actual":"true","isCorrect":true,"inputTokens":5010,"outputTokens":327,"latencyMs":7745.852374999999},{"questionId":"q16","format":"json","model":"gpt-5-nano","expected":"89436","actual":"89436","isCorrect":true,"inputTokens":6389,"outputTokens":72,"latencyMs":2094.709333999999},{"questionId":"q16","format":"toon","model":"gpt-5-nano","expected":"89436","actual":"89436","isCorrect":true,"inputTokens":2526,"outputTokens":72,"latencyMs":3989.400916999999},{"questionId":"q16","format":"csv","model":"gpt-5-nano","expected":"89436","actual":"89436","isCorrect":true,"inputTokens":2380,"outputTokens":72,"latencyMs":1999.0430420000012},{"questionId":"q16","format":"xml","model":"gpt-5-nano","expected":"89436","actual":"89436","isCorrect":true,"inputTokens":7356,"outputTokens":136,"latencyMs":3469.017167000002},{"questionId":"q16","format":"yaml","model":"gpt-5-nano","expected":"89436","actual":"89436","isCorrect":true,"inputTokens":5011,"outputTokens":136,"latencyMs":2959.3207089999996},{"questionId":"q17","format":"json","model":"gpt-5-nano","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":6392,"outputTokens":135,"latencyMs":4353.834665999999},{"questionId":"q17","format":"toon","model":"gpt-5-nano","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":2529,"outputTokens":135,"latencyMs":2734.705167},{"questionId":"q17","format":"csv","model":"gpt-5-nano","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":2383,"outputTokens":135,"latencyMs":5485.966791999999},{"questionId":"q17","format":"xml","model":"gpt-5-nano","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":7359,"outputTokens":135,"latencyMs":3338.3081660000025},{"questionId":"q17","format":"yaml","model":"gpt-5-nano","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":5014,"outputTokens":135,"latencyMs":2317.4250410000022},{"questionId":"q18","format":"json","model":"gpt-5-nano","expected":"kelvin54@yahoo.com","actual":"kelvin54@yahoo.com","isCorrect":true,"inputTokens":6390,"outputTokens":139,"latencyMs":2934.8541250000017},{"questionId":"q18","format":"toon","model":"gpt-5-nano","expected":"kelvin54@yahoo.com","actual":"kelvin54@yahoo.com","isCorrect":true,"inputTokens":2527,"outputTokens":75,"latencyMs":2196.355125000002},{"questionId":"q18","format":"csv","model":"gpt-5-nano","expected":"kelvin54@yahoo.com","actual":"kelvin54@yahoo.com","isCorrect":true,"inputTokens":2381,"outputTokens":75,"latencyMs":2179.3174580000014},{"questionId":"q18","format":"xml","model":"gpt-5-nano","expected":"kelvin54@yahoo.com","actual":"kelvin54@yahoo.com","isCorrect":true,"inputTokens":7357,"outputTokens":203,"latencyMs":2986.970416},{"questionId":"q18","format":"yaml","model":"gpt-5-nano","expected":"kelvin54@yahoo.com","actual":"kelvin54@yahoo.com","isCorrect":true,"inputTokens":5012,"outputTokens":139,"latencyMs":2035.5609160000022},{"questionId":"q19","format":"json","model":"gpt-5-nano","expected":"4","actual":"4","isCorrect":true,"inputTokens":6392,"outputTokens":135,"latencyMs":2827.9320420000004},{"questionId":"q19","format":"toon","model":"gpt-5-nano","expected":"4","actual":"4","isCorrect":true,"inputTokens":2529,"outputTokens":71,"latencyMs":2052.042333999998},{"questionId":"q19","format":"csv","model":"gpt-5-nano","expected":"4","actual":"4","isCorrect":true,"inputTokens":2383,"outputTokens":135,"latencyMs":2475.6582089999974},{"questionId":"q19","format":"xml","model":"gpt-5-nano","expected":"4","actual":"4","isCorrect":true,"inputTokens":7359,"outputTokens":199,"latencyMs":5298.210291999996},{"questionId":"q19","format":"yaml","model":"gpt-5-nano","expected":"4","actual":"4","isCorrect":true,"inputTokens":5014,"outputTokens":71,"latencyMs":2479.8611249999994},{"questionId":"q20","format":"json","model":"gpt-5-nano","expected":"yes","actual":"true","isCorrect":true,"inputTokens":6388,"outputTokens":199,"latencyMs":3099.4663340000043},{"questionId":"q20","format":"toon","model":"gpt-5-nano","expected":"yes","actual":"true","isCorrect":true,"inputTokens":2525,"outputTokens":327,"latencyMs":4842.604750000002},{"questionId":"q20","format":"csv","model":"gpt-5-nano","expected":"yes","actual":"Yes","isCorrect":true,"inputTokens":2379,"outputTokens":135,"latencyMs":2375.8693330000024},{"questionId":"q20","format":"xml","model":"gpt-5-nano","expected":"yes","actual":"true","isCorrect":true,"inputTokens":7355,"outputTokens":199,"latencyMs":3211.3723340000033},{"questionId":"q20","format":"yaml","model":"gpt-5-nano","expected":"yes","actual":"true","isCorrect":true,"inputTokens":5010,"outputTokens":135,"latencyMs":3330.7180000000008},{"questionId":"q21","format":"json","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":6387,"outputTokens":647,"latencyMs":7148.650417000001},{"questionId":"q21","format":"toon","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":2524,"outputTokens":1607,"latencyMs":15327.959125000001},{"questionId":"q21","format":"csv","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":2378,"outputTokens":967,"latencyMs":10992.290750000004},{"questionId":"q21","format":"xml","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":7354,"outputTokens":1031,"latencyMs":9394.927084000003},{"questionId":"q21","format":"yaml","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":5009,"outputTokens":903,"latencyMs":10763.375417000003},{"questionId":"q22","format":"json","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":6387,"outputTokens":391,"latencyMs":4349.884417000001},{"questionId":"q22","format":"toon","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":2524,"outputTokens":1095,"latencyMs":9809.553958000004},{"questionId":"q22","format":"csv","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":2378,"outputTokens":1031,"latencyMs":9584.158749999995},{"questionId":"q22","format":"xml","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":7354,"outputTokens":519,"latencyMs":5500.127124999999},{"questionId":"q22","format":"yaml","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":5009,"outputTokens":839,"latencyMs":8069.941374999995},{"questionId":"q23","format":"json","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":6387,"outputTokens":647,"latencyMs":6670.407958000003},{"questionId":"q23","format":"toon","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":2524,"outputTokens":1031,"latencyMs":9428.577291000001},{"questionId":"q23","format":"csv","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":2378,"outputTokens":647,"latencyMs":6800.205249999999},{"questionId":"q23","format":"xml","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":7354,"outputTokens":903,"latencyMs":9085.086500000005},{"questionId":"q23","format":"yaml","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":5009,"outputTokens":1031,"latencyMs":10963.525583000002},{"questionId":"q24","format":"json","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":6387,"outputTokens":647,"latencyMs":6168.287916999994},{"questionId":"q24","format":"toon","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":2524,"outputTokens":455,"latencyMs":5222.8764999999985},{"questionId":"q24","format":"csv","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":2378,"outputTokens":967,"latencyMs":9628.338166000001},{"questionId":"q24","format":"xml","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":7354,"outputTokens":967,"latencyMs":8964.717292000001},{"questionId":"q24","format":"yaml","model":"gpt-5-nano","expected":"17","actual":"16","isCorrect":false,"inputTokens":5009,"outputTokens":583,"latencyMs":5695.531999999999},{"questionId":"q25","format":"json","model":"gpt-5-nano","expected":"16","actual":"16","isCorrect":true,"inputTokens":6387,"outputTokens":455,"latencyMs":4840.368499999997},{"questionId":"q25","format":"toon","model":"gpt-5-nano","expected":"16","actual":"16","isCorrect":true,"inputTokens":2524,"outputTokens":711,"latencyMs":8578.791709000005},{"questionId":"q25","format":"csv","model":"gpt-5-nano","expected":"16","actual":"16","isCorrect":true,"inputTokens":2378,"outputTokens":1479,"latencyMs":15123.943},{"questionId":"q25","format":"xml","model":"gpt-5-nano","expected":"16","actual":"16","isCorrect":true,"inputTokens":7354,"outputTokens":967,"latencyMs":9384.946625000004},{"questionId":"q25","format":"yaml","model":"gpt-5-nano","expected":"16","actual":"16","isCorrect":true,"inputTokens":5009,"outputTokens":839,"latencyMs":10944.875042},{"questionId":"q26","format":"json","model":"gpt-5-nano","expected":"16","actual":"16","isCorrect":true,"inputTokens":6387,"outputTokens":647,"latencyMs":6869.418540999999},{"questionId":"q26","format":"toon","model":"gpt-5-nano","expected":"16","actual":"16","isCorrect":true,"inputTokens":2524,"outputTokens":455,"latencyMs":4867.974583000003},{"questionId":"q26","format":"csv","model":"gpt-5-nano","expected":"16","actual":"16","isCorrect":true,"inputTokens":2378,"outputTokens":1543,"latencyMs":14267.353582999996},{"questionId":"q26","format":"xml","model":"gpt-5-nano","expected":"16","actual":"16","isCorrect":true,"inputTokens":7354,"outputTokens":647,"latencyMs":7383.540583000002},{"questionId":"q26","format":"yaml","model":"gpt-5-nano","expected":"16","actual":"16","isCorrect":true,"inputTokens":5009,"outputTokens":775,"latencyMs":14026.064583},{"questionId":"q27","format":"json","model":"gpt-5-nano","expected":"91","actual":"91","isCorrect":true,"inputTokens":6392,"outputTokens":2119,"latencyMs":19840.095458000003},{"questionId":"q27","format":"toon","model":"gpt-5-nano","expected":"91","actual":"91","isCorrect":true,"inputTokens":2529,"outputTokens":1863,"latencyMs":19581.628542},{"questionId":"q27","format":"csv","model":"gpt-5-nano","expected":"91","actual":"91","isCorrect":true,"inputTokens":2383,"outputTokens":1863,"latencyMs":17144.098332999994},{"questionId":"q27","format":"xml","model":"gpt-5-nano","expected":"91","actual":"91","isCorrect":true,"inputTokens":7359,"outputTokens":2375,"latencyMs":22871.458750000005},{"questionId":"q27","format":"yaml","model":"gpt-5-nano","expected":"91","actual":"91","isCorrect":true,"inputTokens":5014,"outputTokens":2503,"latencyMs":24232.040250000005},{"questionId":"q28","format":"json","model":"gpt-5-nano","expected":"67","actual":"68","isCorrect":false,"inputTokens":6392,"outputTokens":2695,"latencyMs":22657.479165999997},{"questionId":"q28","format":"toon","model":"gpt-5-nano","expected":"67","actual":"67","isCorrect":true,"inputTokens":2529,"outputTokens":1607,"latencyMs":13011.923875},{"questionId":"q28","format":"csv","model":"gpt-5-nano","expected":"67","actual":"64","isCorrect":false,"inputTokens":2383,"outputTokens":1927,"latencyMs":17143.839250000005},{"questionId":"q28","format":"xml","model":"gpt-5-nano","expected":"67","actual":"66","isCorrect":false,"inputTokens":7359,"outputTokens":2119,"latencyMs":19857.302667000004},{"questionId":"q28","format":"yaml","model":"gpt-5-nano","expected":"67","actual":"67","isCorrect":true,"inputTokens":5014,"outputTokens":1799,"latencyMs":17493.660707999996},{"questionId":"q29","format":"json","model":"gpt-5-nano","expected":"41","actual":"41","isCorrect":true,"inputTokens":6392,"outputTokens":1543,"latencyMs":13661.939208000003},{"questionId":"q29","format":"toon","model":"gpt-5-nano","expected":"41","actual":"41","isCorrect":true,"inputTokens":2529,"outputTokens":1415,"latencyMs":13394.808249999995},{"questionId":"q29","format":"csv","model":"gpt-5-nano","expected":"41","actual":"41","isCorrect":true,"inputTokens":2383,"outputTokens":1863,"latencyMs":16580.891334000007},{"questionId":"q29","format":"xml","model":"gpt-5-nano","expected":"41","actual":"41","isCorrect":true,"inputTokens":7359,"outputTokens":1543,"latencyMs":14548.037708000003},{"questionId":"q29","format":"yaml","model":"gpt-5-nano","expected":"41","actual":"41","isCorrect":true,"inputTokens":5014,"outputTokens":1671,"latencyMs":14537.892209000005},{"questionId":"q30","format":"json","model":"gpt-5-nano","expected":"26","actual":"26","isCorrect":true,"inputTokens":6392,"outputTokens":1159,"latencyMs":11617.139958},{"questionId":"q30","format":"toon","model":"gpt-5-nano","expected":"26","actual":"26","isCorrect":true,"inputTokens":2529,"outputTokens":1671,"latencyMs":17613.913875},{"questionId":"q30","format":"csv","model":"gpt-5-nano","expected":"26","actual":"26","isCorrect":true,"inputTokens":2383,"outputTokens":1287,"latencyMs":9721.494916999996},{"questionId":"q30","format":"xml","model":"gpt-5-nano","expected":"26","actual":"26","isCorrect":true,"inputTokens":7359,"outputTokens":1543,"latencyMs":14938.151124999997},{"questionId":"q30","format":"yaml","model":"gpt-5-nano","expected":"26","actual":"26","isCorrect":true,"inputTokens":5014,"outputTokens":1543,"latencyMs":15495.643333},{"questionId":"q31","format":"json","model":"gpt-5-nano","expected":"100","actual":"100","isCorrect":true,"inputTokens":6388,"outputTokens":71,"latencyMs":1938.4781660000008},{"questionId":"q31","format":"toon","model":"gpt-5-nano","expected":"100","actual":"100","isCorrect":true,"inputTokens":2525,"outputTokens":71,"latencyMs":2216.999291},{"questionId":"q31","format":"csv","model":"gpt-5-nano","expected":"100","actual":"100","isCorrect":true,"inputTokens":2379,"outputTokens":135,"latencyMs":2948.765041999999},{"questionId":"q31","format":"xml","model":"gpt-5-nano","expected":"100","actual":"100","isCorrect":true,"inputTokens":7355,"outputTokens":135,"latencyMs":3050.746583999993},{"questionId":"q31","format":"yaml","model":"gpt-5-nano","expected":"100","actual":"100","isCorrect":true,"inputTokens":5010,"outputTokens":71,"latencyMs":1862.2132500000007},{"questionId":"q32","format":"json","model":"gpt-5-nano","expected":"96503","actual":"96503.32","isCorrect":false,"inputTokens":6389,"outputTokens":4426,"latencyMs":38802.84825000001},{"questionId":"q32","format":"toon","model":"gpt-5-nano","expected":"96503","actual":"96503.32","isCorrect":false,"inputTokens":2526,"outputTokens":4874,"latencyMs":39527.296707999994},{"questionId":"q32","format":"csv","model":"gpt-5-nano","expected":"96503","actual":"96503.32","isCorrect":false,"inputTokens":2380,"outputTokens":3466,"latencyMs":31568.755042000004},{"questionId":"q32","format":"xml","model":"gpt-5-nano","expected":"96503","actual":"97075.91","isCorrect":false,"inputTokens":7356,"outputTokens":7946,"latencyMs":71846.78920900001},{"questionId":"q32","format":"yaml","model":"gpt-5-nano","expected":"96503","actual":"96503.32","isCorrect":false,"inputTokens":5011,"outputTokens":3210,"latencyMs":29167.25637500001},{"questionId":"q33","format":"json","model":"gpt-5-nano","expected":"78","actual":"78","isCorrect":true,"inputTokens":6386,"outputTokens":3079,"latencyMs":27806.129750000007},{"questionId":"q33","format":"toon","model":"gpt-5-nano","expected":"78","actual":"78","isCorrect":true,"inputTokens":2523,"outputTokens":1287,"latencyMs":11461.352291999996},{"questionId":"q33","format":"csv","model":"gpt-5-nano","expected":"78","actual":"79","isCorrect":false,"inputTokens":2377,"outputTokens":3079,"latencyMs":28779.471042000005},{"questionId":"q33","format":"xml","model":"gpt-5-nano","expected":"78","actual":"77","isCorrect":false,"inputTokens":7353,"outputTokens":1095,"latencyMs":11862.612083},{"questionId":"q33","format":"yaml","model":"gpt-5-nano","expected":"78","actual":"78","isCorrect":true,"inputTokens":5008,"outputTokens":1671,"latencyMs":16546.208209000004},{"questionId":"q34","format":"json","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":6386,"outputTokens":1287,"latencyMs":12874.773583000002},{"questionId":"q34","format":"toon","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":2523,"outputTokens":967,"latencyMs":10128.866540999996},{"questionId":"q34","format":"csv","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":2377,"outputTokens":2567,"latencyMs":21328.398541999995},{"questionId":"q34","format":"xml","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":7353,"outputTokens":1223,"latencyMs":10527.548834000001},{"questionId":"q34","format":"yaml","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":5008,"outputTokens":1159,"latencyMs":10514.372334},{"questionId":"q35","format":"json","model":"gpt-5-nano","expected":"12","actual":"12","isCorrect":true,"inputTokens":6394,"outputTokens":1671,"latencyMs":16690.553291999997},{"questionId":"q35","format":"toon","model":"gpt-5-nano","expected":"12","actual":"12","isCorrect":true,"inputTokens":2531,"outputTokens":1543,"latencyMs":18155.74162500001},{"questionId":"q35","format":"csv","model":"gpt-5-nano","expected":"12","actual":"12","isCorrect":true,"inputTokens":2385,"outputTokens":2247,"latencyMs":19133.287500000006},{"questionId":"q35","format":"xml","model":"gpt-5-nano","expected":"12","actual":"12","isCorrect":true,"inputTokens":7361,"outputTokens":1287,"latencyMs":14527.046083000008},{"questionId":"q35","format":"yaml","model":"gpt-5-nano","expected":"12","actual":"12","isCorrect":true,"inputTokens":5016,"outputTokens":1031,"latencyMs":11708.512457999997},{"questionId":"q36","format":"json","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":6394,"outputTokens":1223,"latencyMs":15122.788833999992},{"questionId":"q36","format":"toon","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":2531,"outputTokens":839,"latencyMs":10423.516166000001},{"questionId":"q36","format":"csv","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":2385,"outputTokens":1735,"latencyMs":15695.157332999996},{"questionId":"q36","format":"xml","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":7361,"outputTokens":1415,"latencyMs":13899.425959},{"questionId":"q36","format":"yaml","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":5016,"outputTokens":1159,"latencyMs":11514.098790999997},{"questionId":"q37","format":"json","model":"gpt-5-nano","expected":"11","actual":"10","isCorrect":false,"inputTokens":6394,"outputTokens":1287,"latencyMs":13009.814249999996},{"questionId":"q37","format":"toon","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":2531,"outputTokens":1543,"latencyMs":15871.650417000012},{"questionId":"q37","format":"csv","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":2385,"outputTokens":967,"latencyMs":9735.937375000009},{"questionId":"q37","format":"xml","model":"gpt-5-nano","expected":"11","actual":"10","isCorrect":false,"inputTokens":7361,"outputTokens":2055,"latencyMs":20242.728875},{"questionId":"q37","format":"yaml","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":5016,"outputTokens":1671,"latencyMs":16286.792667000002},{"questionId":"q38","format":"json","model":"gpt-5-nano","expected":"12","actual":"12","isCorrect":true,"inputTokens":6394,"outputTokens":2183,"latencyMs":21316.307375000004},{"questionId":"q38","format":"toon","model":"gpt-5-nano","expected":"12","actual":"12","isCorrect":true,"inputTokens":2531,"outputTokens":2439,"latencyMs":24585.012208999993},{"questionId":"q38","format":"csv","model":"gpt-5-nano","expected":"12","actual":"12","isCorrect":true,"inputTokens":2385,"outputTokens":1287,"latencyMs":15640.584124999994},{"questionId":"q38","format":"xml","model":"gpt-5-nano","expected":"12","actual":"12","isCorrect":true,"inputTokens":7361,"outputTokens":1415,"latencyMs":13889.092875000002},{"questionId":"q38","format":"yaml","model":"gpt-5-nano","expected":"12","actual":"12","isCorrect":true,"inputTokens":5016,"outputTokens":1159,"latencyMs":11549.750583000001},{"questionId":"q39","format":"json","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":6394,"outputTokens":583,"latencyMs":6874.004750000007},{"questionId":"q39","format":"toon","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":2531,"outputTokens":1031,"latencyMs":14618.748875000005},{"questionId":"q39","format":"csv","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":2385,"outputTokens":2055,"latencyMs":42524.04500000001},{"questionId":"q39","format":"xml","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":7361,"outputTokens":1095,"latencyMs":10262.768083999996},{"questionId":"q39","format":"yaml","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":5016,"outputTokens":1415,"latencyMs":13156.821458000006},{"questionId":"q40","format":"json","model":"gpt-5-nano","expected":"10","actual":"8","isCorrect":false,"inputTokens":6394,"outputTokens":1351,"latencyMs":15696.610916999998},{"questionId":"q40","format":"toon","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":2531,"outputTokens":1415,"latencyMs":15140.198166000002},{"questionId":"q40","format":"csv","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":2385,"outputTokens":1351,"latencyMs":20472.353375000006},{"questionId":"q40","format":"xml","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":7361,"outputTokens":2183,"latencyMs":23243.26454100001},{"questionId":"q40","format":"yaml","model":"gpt-5-nano","expected":"10","actual":"9","isCorrect":false,"inputTokens":5016,"outputTokens":1543,"latencyMs":17841.989625000002},{"questionId":"q41","format":"json","model":"gpt-5-nano","expected":"63","actual":"63","isCorrect":true,"inputTokens":6393,"outputTokens":2695,"latencyMs":23743.842332999993},{"questionId":"q41","format":"toon","model":"gpt-5-nano","expected":"63","actual":"63","isCorrect":true,"inputTokens":2530,"outputTokens":2631,"latencyMs":27546.34533299999},{"questionId":"q41","format":"csv","model":"gpt-5-nano","expected":"63","actual":"65","isCorrect":false,"inputTokens":2384,"outputTokens":4039,"latencyMs":42146.063124999986},{"questionId":"q41","format":"xml","model":"gpt-5-nano","expected":"63","actual":"63","isCorrect":true,"inputTokens":7360,"outputTokens":4871,"latencyMs":37767.37599999999},{"questionId":"q41","format":"yaml","model":"gpt-5-nano","expected":"63","actual":"63","isCorrect":true,"inputTokens":5015,"outputTokens":2439,"latencyMs":25553.095333999998},{"questionId":"q42","format":"json","model":"gpt-5-nano","expected":"53","actual":"53","isCorrect":true,"inputTokens":6393,"outputTokens":2375,"latencyMs":22442.41562500001},{"questionId":"q42","format":"toon","model":"gpt-5-nano","expected":"53","actual":"53","isCorrect":true,"inputTokens":2530,"outputTokens":2567,"latencyMs":25569.86658300001},{"questionId":"q42","format":"csv","model":"gpt-5-nano","expected":"53","actual":"54","isCorrect":false,"inputTokens":2384,"outputTokens":3079,"latencyMs":25882.737875000006},{"questionId":"q42","format":"xml","model":"gpt-5-nano","expected":"53","actual":"53","isCorrect":true,"inputTokens":7360,"outputTokens":2695,"latencyMs":28840.383208000014},{"questionId":"q42","format":"yaml","model":"gpt-5-nano","expected":"53","actual":"53","isCorrect":true,"inputTokens":5015,"outputTokens":2439,"latencyMs":25784.016457999984},{"questionId":"q43","format":"json","model":"gpt-5-nano","expected":"39","actual":"39","isCorrect":true,"inputTokens":6393,"outputTokens":2183,"latencyMs":20179.226250000007},{"questionId":"q43","format":"toon","model":"gpt-5-nano","expected":"39","actual":"39","isCorrect":true,"inputTokens":2530,"outputTokens":2567,"latencyMs":25615.354624999993},{"questionId":"q43","format":"csv","model":"gpt-5-nano","expected":"39","actual":"39","isCorrect":true,"inputTokens":2384,"outputTokens":3335,"latencyMs":25506.231792000006},{"questionId":"q43","format":"xml","model":"gpt-5-nano","expected":"39","actual":"39","isCorrect":true,"inputTokens":7360,"outputTokens":2503,"latencyMs":19985.533500000005},{"questionId":"q43","format":"yaml","model":"gpt-5-nano","expected":"39","actual":"39","isCorrect":true,"inputTokens":5015,"outputTokens":2631,"latencyMs":22299.584208},{"questionId":"q44","format":"json","model":"gpt-5-nano","expected":"16","actual":"16","isCorrect":true,"inputTokens":6393,"outputTokens":2311,"latencyMs":24179.406917},{"questionId":"q44","format":"toon","model":"gpt-5-nano","expected":"16","actual":"16","isCorrect":true,"inputTokens":2530,"outputTokens":2375,"latencyMs":19273.154207999993},{"questionId":"q44","format":"csv","model":"gpt-5-nano","expected":"16","actual":"16","isCorrect":true,"inputTokens":2384,"outputTokens":2567,"latencyMs":25190.493749999994},{"questionId":"q44","format":"xml","model":"gpt-5-nano","expected":"16","actual":"16","isCorrect":true,"inputTokens":7360,"outputTokens":1991,"latencyMs":16888.5325},{"questionId":"q44","format":"yaml","model":"gpt-5-nano","expected":"16","actual":"16","isCorrect":true,"inputTokens":5015,"outputTokens":1799,"latencyMs":16030.792166999978},{"questionId":"q45","format":"json","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":6394,"outputTokens":1287,"latencyMs":12991.277834000008},{"questionId":"q45","format":"toon","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":2531,"outputTokens":1351,"latencyMs":13087.36054199998},{"questionId":"q45","format":"csv","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":2385,"outputTokens":1991,"latencyMs":18446.735167000006},{"questionId":"q45","format":"xml","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":7361,"outputTokens":1607,"latencyMs":10865.307417000004},{"questionId":"q45","format":"yaml","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":5016,"outputTokens":1159,"latencyMs":14611.011917000025},{"questionId":"q46","format":"json","model":"gpt-5-nano","expected":"8","actual":"8","isCorrect":true,"inputTokens":6394,"outputTokens":967,"latencyMs":7963.411249999976},{"questionId":"q46","format":"toon","model":"gpt-5-nano","expected":"8","actual":"8","isCorrect":true,"inputTokens":2531,"outputTokens":1287,"latencyMs":13350.95537499999},{"questionId":"q46","format":"csv","model":"gpt-5-nano","expected":"8","actual":"8","isCorrect":true,"inputTokens":2385,"outputTokens":1223,"latencyMs":12941.291666999983},{"questionId":"q46","format":"xml","model":"gpt-5-nano","expected":"8","actual":"7","isCorrect":false,"inputTokens":7361,"outputTokens":2055,"latencyMs":19121.181458999985},{"questionId":"q46","format":"yaml","model":"gpt-5-nano","expected":"8","actual":"8","isCorrect":true,"inputTokens":5016,"outputTokens":839,"latencyMs":11689.81270899999},{"questionId":"q47","format":"json","model":"gpt-5-nano","expected":"15","actual":"15","isCorrect":true,"inputTokens":6394,"outputTokens":1735,"latencyMs":12743.134749999997},{"questionId":"q47","format":"toon","model":"gpt-5-nano","expected":"15","actual":"15","isCorrect":true,"inputTokens":2531,"outputTokens":775,"latencyMs":8221.038832999999},{"questionId":"q47","format":"csv","model":"gpt-5-nano","expected":"15","actual":"15","isCorrect":true,"inputTokens":2385,"outputTokens":775,"latencyMs":8083.291667000012},{"questionId":"q47","format":"xml","model":"gpt-5-nano","expected":"15","actual":"15","isCorrect":true,"inputTokens":7361,"outputTokens":1031,"latencyMs":12041.053416999988},{"questionId":"q47","format":"yaml","model":"gpt-5-nano","expected":"15","actual":"14","isCorrect":false,"inputTokens":5016,"outputTokens":1159,"latencyMs":12225.70216700001},{"questionId":"q48","format":"json","model":"gpt-5-nano","expected":"12","actual":"12","isCorrect":true,"inputTokens":6388,"outputTokens":1031,"latencyMs":10024.215874999994},{"questionId":"q48","format":"toon","model":"gpt-5-nano","expected":"12","actual":"12","isCorrect":true,"inputTokens":2525,"outputTokens":1287,"latencyMs":12129.384416000015},{"questionId":"q48","format":"csv","model":"gpt-5-nano","expected":"12","actual":"12","isCorrect":true,"inputTokens":2379,"outputTokens":3335,"latencyMs":17532.385958},{"questionId":"q48","format":"xml","model":"gpt-5-nano","expected":"12","actual":"12","isCorrect":true,"inputTokens":7355,"outputTokens":1287,"latencyMs":14163.686583000002},{"questionId":"q48","format":"yaml","model":"gpt-5-nano","expected":"12","actual":"12","isCorrect":true,"inputTokens":5010,"outputTokens":1031,"latencyMs":10721.844666999998},{"questionId":"q49","format":"json","model":"gpt-5-nano","expected":"11","actual":"10","isCorrect":false,"inputTokens":6388,"outputTokens":647,"latencyMs":10175.196499999991},{"questionId":"q49","format":"toon","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":2525,"outputTokens":1095,"latencyMs":8359.25920900001},{"questionId":"q49","format":"csv","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":2379,"outputTokens":1671,"latencyMs":19210.797334000003},{"questionId":"q49","format":"xml","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":7355,"outputTokens":839,"latencyMs":9501.392666999978},{"questionId":"q49","format":"yaml","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":5010,"outputTokens":903,"latencyMs":10511.122625000018},{"questionId":"q50","format":"json","model":"gpt-5-nano","expected":"14","actual":"14","isCorrect":true,"inputTokens":6388,"outputTokens":839,"latencyMs":7034.4220000000205},{"questionId":"q50","format":"toon","model":"gpt-5-nano","expected":"14","actual":"14","isCorrect":true,"inputTokens":2525,"outputTokens":903,"latencyMs":9088.10166700001},{"questionId":"q50","format":"csv","model":"gpt-5-nano","expected":"14","actual":"14","isCorrect":true,"inputTokens":2379,"outputTokens":1479,"latencyMs":13106.483208999998},{"questionId":"q50","format":"xml","model":"gpt-5-nano","expected":"14","actual":"14","isCorrect":true,"inputTokens":7355,"outputTokens":1223,"latencyMs":12101.726083999994},{"questionId":"q50","format":"yaml","model":"gpt-5-nano","expected":"14","actual":"14","isCorrect":true,"inputTokens":5010,"outputTokens":1799,"latencyMs":17414.184500000003},{"questionId":"q51","format":"json","model":"gpt-5-nano","expected":"96.17","actual":"96.17","isCorrect":true,"inputTokens":9738,"outputTokens":73,"latencyMs":3038.685334000009},{"questionId":"q51","format":"toon","model":"gpt-5-nano","expected":"96.17","actual":"96.17","isCorrect":true,"inputTokens":6012,"outputTokens":73,"latencyMs":2160.960334000003},{"questionId":"q51","format":"csv","model":"gpt-5-nano","expected":"96.17","actual":"96.17","isCorrect":true,"inputTokens":6780,"outputTokens":137,"latencyMs":2365.200749999989},{"questionId":"q51","format":"xml","model":"gpt-5-nano","expected":"96.17","actual":"96.17","isCorrect":true,"inputTokens":11036,"outputTokens":137,"latencyMs":1989.758124999993},{"questionId":"q51","format":"yaml","model":"gpt-5-nano","expected":"96.17","actual":"96.17","isCorrect":true,"inputTokens":7372,"outputTokens":137,"latencyMs":2664.6698329999927},{"questionId":"q52","format":"json","model":"gpt-5-nano","expected":"shipped","actual":"shipped","isCorrect":true,"inputTokens":9738,"outputTokens":136,"latencyMs":3202.772499999992},{"questionId":"q52","format":"toon","model":"gpt-5-nano","expected":"shipped","actual":"shipped","isCorrect":true,"inputTokens":6012,"outputTokens":136,"latencyMs":2305.1638749999984},{"questionId":"q52","format":"csv","model":"gpt-5-nano","expected":"shipped","actual":"shipped","isCorrect":true,"inputTokens":6780,"outputTokens":200,"latencyMs":3069.4525830000057},{"questionId":"q52","format":"xml","model":"gpt-5-nano","expected":"shipped","actual":"shipped","isCorrect":true,"inputTokens":11036,"outputTokens":200,"latencyMs":2304.95974999998},{"questionId":"q52","format":"yaml","model":"gpt-5-nano","expected":"shipped","actual":"shipped","isCorrect":true,"inputTokens":7372,"outputTokens":136,"latencyMs":2077.190957999992},{"questionId":"q53","format":"json","model":"gpt-5-nano","expected":"599.39","actual":"599.39","isCorrect":true,"inputTokens":9738,"outputTokens":265,"latencyMs":3420.801832999976},{"questionId":"q53","format":"toon","model":"gpt-5-nano","expected":"599.39","actual":"599.39","isCorrect":true,"inputTokens":6012,"outputTokens":201,"latencyMs":2733.4107920000097},{"questionId":"q53","format":"csv","model":"gpt-5-nano","expected":"599.39","actual":"599.39","isCorrect":true,"inputTokens":6780,"outputTokens":265,"latencyMs":3371.902375000005},{"questionId":"q53","format":"xml","model":"gpt-5-nano","expected":"599.39","actual":"599.39","isCorrect":true,"inputTokens":11036,"outputTokens":329,"latencyMs":2736.295167000004},{"questionId":"q53","format":"yaml","model":"gpt-5-nano","expected":"599.39","actual":"599.39","isCorrect":true,"inputTokens":7372,"outputTokens":201,"latencyMs":3164.7157080000034},{"questionId":"q54","format":"json","model":"gpt-5-nano","expected":"processing","actual":"processing","isCorrect":true,"inputTokens":9738,"outputTokens":199,"latencyMs":2172.5717090000107},{"questionId":"q54","format":"toon","model":"gpt-5-nano","expected":"processing","actual":"processing","isCorrect":true,"inputTokens":6012,"outputTokens":135,"latencyMs":2345.0319169999857},{"questionId":"q54","format":"csv","model":"gpt-5-nano","expected":"processing","actual":"processing","isCorrect":true,"inputTokens":6780,"outputTokens":135,"latencyMs":3713.114291999984},{"questionId":"q54","format":"xml","model":"gpt-5-nano","expected":"processing","actual":"processing","isCorrect":true,"inputTokens":11036,"outputTokens":391,"latencyMs":6108.840708000003},{"questionId":"q54","format":"yaml","model":"gpt-5-nano","expected":"processing","actual":"processing","isCorrect":true,"inputTokens":7372,"outputTokens":199,"latencyMs":3263.157750000013},{"questionId":"q55","format":"json","model":"gpt-5-nano","expected":"528.71","actual":"528.71","isCorrect":true,"inputTokens":9738,"outputTokens":265,"latencyMs":3599.183208000002},{"questionId":"q55","format":"toon","model":"gpt-5-nano","expected":"528.71","actual":"528.71","isCorrect":true,"inputTokens":6012,"outputTokens":265,"latencyMs":3806.1117080000113},{"questionId":"q55","format":"csv","model":"gpt-5-nano","expected":"528.71","actual":"528.71","isCorrect":true,"inputTokens":6780,"outputTokens":137,"latencyMs":2482.1311250000144},{"questionId":"q55","format":"xml","model":"gpt-5-nano","expected":"528.71","actual":"528.71","isCorrect":true,"inputTokens":11036,"outputTokens":457,"latencyMs":4714.9086669999815},{"questionId":"q55","format":"yaml","model":"gpt-5-nano","expected":"528.71","actual":"528.71","isCorrect":true,"inputTokens":7372,"outputTokens":265,"latencyMs":3542.246542000008},{"questionId":"q56","format":"json","model":"gpt-5-nano","expected":"pending","actual":"pending","isCorrect":true,"inputTokens":9738,"outputTokens":199,"latencyMs":4117.672166999982},{"questionId":"q56","format":"toon","model":"gpt-5-nano","expected":"pending","actual":"pending","isCorrect":true,"inputTokens":6012,"outputTokens":263,"latencyMs":3441.915166999999},{"questionId":"q56","format":"csv","model":"gpt-5-nano","expected":"pending","actual":"pending","isCorrect":true,"inputTokens":6780,"outputTokens":263,"latencyMs":8454.847999999998},{"questionId":"q56","format":"xml","model":"gpt-5-nano","expected":"pending","actual":"pending","isCorrect":true,"inputTokens":11036,"outputTokens":199,"latencyMs":2997.5},{"questionId":"q56","format":"yaml","model":"gpt-5-nano","expected":"pending","actual":"pending","isCorrect":true,"inputTokens":7372,"outputTokens":199,"latencyMs":3116.710000000021},{"questionId":"q57","format":"json","model":"gpt-5-nano","expected":"1687.82","actual":"1687.82","isCorrect":true,"inputTokens":9738,"outputTokens":266,"latencyMs":3084.641333000007},{"questionId":"q57","format":"toon","model":"gpt-5-nano","expected":"1687.82","actual":"1687.82","isCorrect":true,"inputTokens":6012,"outputTokens":202,"latencyMs":3517.2125410000153},{"questionId":"q57","format":"csv","model":"gpt-5-nano","expected":"1687.82","actual":"1687.82","isCorrect":true,"inputTokens":6780,"outputTokens":394,"latencyMs":2861.477082999976},{"questionId":"q57","format":"xml","model":"gpt-5-nano","expected":"1687.82","actual":"1687.82","isCorrect":true,"inputTokens":11036,"outputTokens":330,"latencyMs":4378.942290999985},{"questionId":"q57","format":"yaml","model":"gpt-5-nano","expected":"1687.82","actual":"1687.82","isCorrect":true,"inputTokens":7372,"outputTokens":266,"latencyMs":3748.4990410000028},{"questionId":"q58","format":"json","model":"gpt-5-nano","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":9738,"outputTokens":136,"latencyMs":2310.9124590000138},{"questionId":"q58","format":"toon","model":"gpt-5-nano","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":6012,"outputTokens":392,"latencyMs":5970.874375000014},{"questionId":"q58","format":"csv","model":"gpt-5-nano","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":6780,"outputTokens":264,"latencyMs":4393.402040999994},{"questionId":"q58","format":"xml","model":"gpt-5-nano","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":11036,"outputTokens":200,"latencyMs":3243.2633340000175},{"questionId":"q58","format":"yaml","model":"gpt-5-nano","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":7372,"outputTokens":200,"latencyMs":2769.2004580000066},{"questionId":"q59","format":"json","model":"gpt-5-nano","expected":"Dr. Courtney Satterfield","actual":"Dr. Courtney Satterfield","isCorrect":true,"inputTokens":9739,"outputTokens":76,"latencyMs":2105.4266669999924},{"questionId":"q59","format":"toon","model":"gpt-5-nano","expected":"Dr. Courtney Satterfield","actual":"Dr. Courtney Satterfield","isCorrect":true,"inputTokens":6013,"outputTokens":140,"latencyMs":2265.2000830000034},{"questionId":"q59","format":"csv","model":"gpt-5-nano","expected":"Dr. Courtney Satterfield","actual":"Dr. Courtney Satterfield","isCorrect":true,"inputTokens":6781,"outputTokens":140,"latencyMs":2353.731375000003},{"questionId":"q59","format":"xml","model":"gpt-5-nano","expected":"Dr. Courtney Satterfield","actual":"Dr. Courtney Satterfield","isCorrect":true,"inputTokens":11037,"outputTokens":140,"latencyMs":2614.9170830000076},{"questionId":"q59","format":"yaml","model":"gpt-5-nano","expected":"Dr. Courtney Satterfield","actual":"Dr. Courtney Satterfield","isCorrect":true,"inputTokens":7373,"outputTokens":140,"latencyMs":3472.885209},{"questionId":"q60","format":"json","model":"gpt-5-nano","expected":"lukas71@gmail.com","actual":"lukas71@gmail.com","isCorrect":true,"inputTokens":9739,"outputTokens":139,"latencyMs":2373.651208999974},{"questionId":"q60","format":"toon","model":"gpt-5-nano","expected":"lukas71@gmail.com","actual":"lukas71@gmail.com","isCorrect":true,"inputTokens":6013,"outputTokens":139,"latencyMs":2132.121083999984},{"questionId":"q60","format":"csv","model":"gpt-5-nano","expected":"lukas71@gmail.com","actual":"lukas71@gmail.com","isCorrect":true,"inputTokens":6781,"outputTokens":267,"latencyMs":3185.6174170000013},{"questionId":"q60","format":"xml","model":"gpt-5-nano","expected":"lukas71@gmail.com","actual":"lukas71@gmail.com","isCorrect":true,"inputTokens":11037,"outputTokens":203,"latencyMs":3214.6773329999996},{"questionId":"q60","format":"yaml","model":"gpt-5-nano","expected":"lukas71@gmail.com","actual":"lukas71@gmail.com","isCorrect":true,"inputTokens":7373,"outputTokens":139,"latencyMs":1703.899000000005},{"questionId":"q61","format":"json","model":"gpt-5-nano","expected":"2025-08-05","actual":"2025-08-05","isCorrect":true,"inputTokens":9739,"outputTokens":204,"latencyMs":3408.625457999995},{"questionId":"q61","format":"toon","model":"gpt-5-nano","expected":"2025-08-05","actual":"2025-08-05","isCorrect":true,"inputTokens":6013,"outputTokens":76,"latencyMs":1742.614750000008},{"questionId":"q61","format":"csv","model":"gpt-5-nano","expected":"2025-08-05","actual":"2025-08-05","isCorrect":true,"inputTokens":6781,"outputTokens":268,"latencyMs":4062.0994579999824},{"questionId":"q61","format":"xml","model":"gpt-5-nano","expected":"2025-08-05","actual":"2025-08-05","isCorrect":true,"inputTokens":11037,"outputTokens":332,"latencyMs":4329.766915999993},{"questionId":"q61","format":"yaml","model":"gpt-5-nano","expected":"2025-08-05","actual":"2025-08-05","isCorrect":true,"inputTokens":7373,"outputTokens":140,"latencyMs":2656.797082999983},{"questionId":"q62","format":"json","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":9738,"outputTokens":327,"latencyMs":4221.204874999996},{"questionId":"q62","format":"toon","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":6012,"outputTokens":711,"latencyMs":7848.512791999994},{"questionId":"q62","format":"csv","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":6780,"outputTokens":903,"latencyMs":8287.347917000006},{"questionId":"q62","format":"xml","model":"gpt-5-nano","expected":"3","actual":"10","isCorrect":false,"inputTokens":11036,"outputTokens":647,"latencyMs":6944.630499999999},{"questionId":"q62","format":"yaml","model":"gpt-5-nano","expected":"3","actual":"10","isCorrect":false,"inputTokens":7372,"outputTokens":327,"latencyMs":3122.6620419999817},{"questionId":"q63","format":"json","model":"gpt-5-nano","expected":"Maxine Zemlak","actual":"Maxine Zemlak","isCorrect":true,"inputTokens":9739,"outputTokens":138,"latencyMs":4663.652958999999},{"questionId":"q63","format":"toon","model":"gpt-5-nano","expected":"Maxine Zemlak","actual":"Maxine Zemlak","isCorrect":true,"inputTokens":6013,"outputTokens":138,"latencyMs":3369.3136670000094},{"questionId":"q63","format":"csv","model":"gpt-5-nano","expected":"Maxine Zemlak","actual":"Maxine Zemlak","isCorrect":true,"inputTokens":6781,"outputTokens":266,"latencyMs":3798.8209999999963},{"questionId":"q63","format":"xml","model":"gpt-5-nano","expected":"Maxine Zemlak","actual":"Maxine Zemlak","isCorrect":true,"inputTokens":11037,"outputTokens":202,"latencyMs":3454.3941669999913},{"questionId":"q63","format":"yaml","model":"gpt-5-nano","expected":"Maxine Zemlak","actual":"Maxine Zemlak","isCorrect":true,"inputTokens":7373,"outputTokens":138,"latencyMs":4146.146832999977},{"questionId":"q64","format":"json","model":"gpt-5-nano","expected":"brenden2@hotmail.com","actual":"brenden2@hotmail.com","isCorrect":true,"inputTokens":9739,"outputTokens":267,"latencyMs":5647.55133300001},{"questionId":"q64","format":"toon","model":"gpt-5-nano","expected":"brenden2@hotmail.com","actual":"brenden2@hotmail.com","isCorrect":true,"inputTokens":6013,"outputTokens":203,"latencyMs":3010.75991600001},{"questionId":"q64","format":"csv","model":"gpt-5-nano","expected":"brenden2@hotmail.com","actual":"brenden2@hotmail.com","isCorrect":true,"inputTokens":6781,"outputTokens":267,"latencyMs":2115.998583000008},{"questionId":"q64","format":"xml","model":"gpt-5-nano","expected":"brenden2@hotmail.com","actual":"brenden2@hotmail.com","isCorrect":true,"inputTokens":11037,"outputTokens":331,"latencyMs":4380.475833000004},{"questionId":"q64","format":"yaml","model":"gpt-5-nano","expected":"brenden2@hotmail.com","actual":"brenden2@hotmail.com","isCorrect":true,"inputTokens":7373,"outputTokens":139,"latencyMs":2166.7608330000076},{"questionId":"q65","format":"json","model":"gpt-5-nano","expected":"2025-08-29","actual":"2025-08-29","isCorrect":true,"inputTokens":9739,"outputTokens":332,"latencyMs":3944.2122079999826},{"questionId":"q65","format":"toon","model":"gpt-5-nano","expected":"2025-08-29","actual":"2025-08-29","isCorrect":true,"inputTokens":6013,"outputTokens":268,"latencyMs":3732.385457999975},{"questionId":"q65","format":"csv","model":"gpt-5-nano","expected":"2025-08-29","actual":"2025-08-29","isCorrect":true,"inputTokens":6781,"outputTokens":396,"latencyMs":2841.6518329999817},{"questionId":"q65","format":"xml","model":"gpt-5-nano","expected":"2025-08-29","actual":"2025-08-29","isCorrect":true,"inputTokens":11037,"outputTokens":140,"latencyMs":2206.0024580000027},{"questionId":"q65","format":"yaml","model":"gpt-5-nano","expected":"2025-08-29","actual":"2025-08-29","isCorrect":true,"inputTokens":7373,"outputTokens":140,"latencyMs":2291.364208999992},{"questionId":"q66","format":"json","model":"gpt-5-nano","expected":"4","actual":"2","isCorrect":false,"inputTokens":9738,"outputTokens":519,"latencyMs":4125.47641599999},{"questionId":"q66","format":"toon","model":"gpt-5-nano","expected":"4","actual":"4","isCorrect":true,"inputTokens":6012,"outputTokens":711,"latencyMs":8088.974500000011},{"questionId":"q66","format":"csv","model":"gpt-5-nano","expected":"4","actual":"11","isCorrect":false,"inputTokens":6780,"outputTokens":519,"latencyMs":6321.9130000000005},{"questionId":"q66","format":"xml","model":"gpt-5-nano","expected":"4","actual":"11","isCorrect":false,"inputTokens":11036,"outputTokens":583,"latencyMs":4200.701750000007},{"questionId":"q66","format":"yaml","model":"gpt-5-nano","expected":"4","actual":"4","isCorrect":true,"inputTokens":7372,"outputTokens":583,"latencyMs":6297.599625000003},{"questionId":"q67","format":"json","model":"gpt-5-nano","expected":"Claudia Cruickshank DVM","actual":"Claudia Cruickshank DVM","isCorrect":true,"inputTokens":9739,"outputTokens":143,"latencyMs":2912.834124999994},{"questionId":"q67","format":"toon","model":"gpt-5-nano","expected":"Claudia Cruickshank DVM","actual":"Claudia Cruickshank DVM","isCorrect":true,"inputTokens":6013,"outputTokens":143,"latencyMs":2421.850584},{"questionId":"q67","format":"csv","model":"gpt-5-nano","expected":"Claudia Cruickshank DVM","actual":"Claudia Cruickshank DVM","isCorrect":true,"inputTokens":6781,"outputTokens":207,"latencyMs":3116.5822500000068},{"questionId":"q67","format":"xml","model":"gpt-5-nano","expected":"Claudia Cruickshank DVM","actual":"Claudia Cruickshank DVM","isCorrect":true,"inputTokens":11037,"outputTokens":207,"latencyMs":3218.9356249999837},{"questionId":"q67","format":"yaml","model":"gpt-5-nano","expected":"Claudia Cruickshank DVM","actual":"Claudia Cruickshank DVM","isCorrect":true,"inputTokens":7373,"outputTokens":335,"latencyMs":5959.828000000009},{"questionId":"q68","format":"json","model":"gpt-5-nano","expected":"freeda.maggio74@gmail.com","actual":"freeda.maggio74@gmail.com","isCorrect":true,"inputTokens":9739,"outputTokens":205,"latencyMs":2961.5506250000035},{"questionId":"q68","format":"toon","model":"gpt-5-nano","expected":"freeda.maggio74@gmail.com","actual":"freeda.maggio74@gmail.com","isCorrect":true,"inputTokens":6013,"outputTokens":141,"latencyMs":2043.8920419999922},{"questionId":"q68","format":"csv","model":"gpt-5-nano","expected":"freeda.maggio74@gmail.com","actual":"freeda.maggio74@gmail.com","isCorrect":true,"inputTokens":6781,"outputTokens":333,"latencyMs":3585.4907080000266},{"questionId":"q68","format":"xml","model":"gpt-5-nano","expected":"freeda.maggio74@gmail.com","actual":"freeda.maggio74@gmail.com","isCorrect":true,"inputTokens":11037,"outputTokens":141,"latencyMs":3028.4967079999915},{"questionId":"q68","format":"yaml","model":"gpt-5-nano","expected":"freeda.maggio74@gmail.com","actual":"freeda.maggio74@gmail.com","isCorrect":true,"inputTokens":7373,"outputTokens":141,"latencyMs":2843.2516249999753},{"questionId":"q69","format":"json","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":9735,"outputTokens":903,"latencyMs":15920.834208999993},{"questionId":"q69","format":"toon","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":6009,"outputTokens":583,"latencyMs":6311.494167000026},{"questionId":"q69","format":"csv","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":6777,"outputTokens":1159,"latencyMs":11771.282832999976},{"questionId":"q69","format":"xml","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":11033,"outputTokens":647,"latencyMs":4768.233042000007},{"questionId":"q69","format":"yaml","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":7369,"outputTokens":711,"latencyMs":12148.621790999983},{"questionId":"q70","format":"json","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":9735,"outputTokens":647,"latencyMs":7048.331458000001},{"questionId":"q70","format":"toon","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":6009,"outputTokens":519,"latencyMs":11328.925374999992},{"questionId":"q70","format":"csv","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":6777,"outputTokens":583,"latencyMs":6098.344834000018},{"questionId":"q70","format":"xml","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":11033,"outputTokens":903,"latencyMs":9603.738207999995},{"questionId":"q70","format":"yaml","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":7369,"outputTokens":903,"latencyMs":14231.113124999974},{"questionId":"q71","format":"json","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":9736,"outputTokens":775,"latencyMs":7369.550875000015},{"questionId":"q71","format":"toon","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":6010,"outputTokens":583,"latencyMs":6731.325707999989},{"questionId":"q71","format":"csv","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":6778,"outputTokens":583,"latencyMs":7837.276500000007},{"questionId":"q71","format":"xml","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":11034,"outputTokens":519,"latencyMs":6240.008499999996},{"questionId":"q71","format":"yaml","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":7370,"outputTokens":583,"latencyMs":5608.396291000012},{"questionId":"q72","format":"json","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":9736,"outputTokens":775,"latencyMs":6498.46762499999},{"questionId":"q72","format":"toon","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":6010,"outputTokens":455,"latencyMs":5201.593292000005},{"questionId":"q72","format":"csv","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":6778,"outputTokens":839,"latencyMs":8005.897459},{"questionId":"q72","format":"xml","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":11034,"outputTokens":903,"latencyMs":8286.163332999975},{"questionId":"q72","format":"yaml","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":7370,"outputTokens":583,"latencyMs":3667.8866249999846},{"questionId":"q73","format":"json","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":9736,"outputTokens":711,"latencyMs":4776.384583000006},{"questionId":"q73","format":"toon","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":6010,"outputTokens":711,"latencyMs":9609.80254199999},{"questionId":"q73","format":"csv","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":6778,"outputTokens":583,"latencyMs":5845.595000000001},{"questionId":"q73","format":"xml","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":11034,"outputTokens":1031,"latencyMs":11357.896833000006},{"questionId":"q73","format":"yaml","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":7370,"outputTokens":519,"latencyMs":5951.586875000008},{"questionId":"q74","format":"json","model":"gpt-5-nano","expected":"42342.25","actual":"42342.25","isCorrect":true,"inputTokens":9736,"outputTokens":1802,"latencyMs":19755.418540999992},{"questionId":"q74","format":"toon","model":"gpt-5-nano","expected":"42342.25","actual":"42342.25","isCorrect":true,"inputTokens":6010,"outputTokens":3978,"latencyMs":33005.04820800002},{"questionId":"q74","format":"csv","model":"gpt-5-nano","expected":"42342.25","actual":"42342.25","isCorrect":true,"inputTokens":6778,"outputTokens":2826,"latencyMs":15668.504250000027},{"questionId":"q74","format":"xml","model":"gpt-5-nano","expected":"42342.25","actual":"42342.25","isCorrect":true,"inputTokens":11034,"outputTokens":2442,"latencyMs":21508.00350000002},{"questionId":"q74","format":"yaml","model":"gpt-5-nano","expected":"42342.25","actual":"49193.23","isCorrect":false,"inputTokens":7370,"outputTokens":2378,"latencyMs":22421.654666999995},{"questionId":"q75","format":"json","model":"gpt-5-nano","expected":"846.85","actual":"846.85","isCorrect":true,"inputTokens":9734,"outputTokens":2569,"latencyMs":14351.128457999992},{"questionId":"q75","format":"toon","model":"gpt-5-nano","expected":"846.85","actual":"846.85","isCorrect":true,"inputTokens":6008,"outputTokens":2889,"latencyMs":26770.479124999983},{"questionId":"q75","format":"csv","model":"gpt-5-nano","expected":"846.85","actual":"846.85","isCorrect":true,"inputTokens":6776,"outputTokens":1865,"latencyMs":15913.093415999989},{"questionId":"q75","format":"xml","model":"gpt-5-nano","expected":"846.85","actual":"846.85","isCorrect":true,"inputTokens":11032,"outputTokens":1545,"latencyMs":13416.188874999993},{"questionId":"q75","format":"yaml","model":"gpt-5-nano","expected":"846.85","actual":"846.85","isCorrect":true,"inputTokens":7368,"outputTokens":2569,"latencyMs":23448.192584000004},{"questionId":"q76","format":"json","model":"gpt-5-nano","expected":"50","actual":"50","isCorrect":true,"inputTokens":9735,"outputTokens":135,"latencyMs":5185.920958000002},{"questionId":"q76","format":"toon","model":"gpt-5-nano","expected":"50","actual":"50","isCorrect":true,"inputTokens":6009,"outputTokens":71,"latencyMs":2111.6935419999936},{"questionId":"q76","format":"csv","model":"gpt-5-nano","expected":"50","actual":"50","isCorrect":true,"inputTokens":6777,"outputTokens":71,"latencyMs":2160.7785420000146},{"questionId":"q76","format":"xml","model":"gpt-5-nano","expected":"50","actual":"50","isCorrect":true,"inputTokens":11033,"outputTokens":455,"latencyMs":4898.41833299998},{"questionId":"q76","format":"yaml","model":"gpt-5-nano","expected":"50","actual":"50","isCorrect":true,"inputTokens":7369,"outputTokens":263,"latencyMs":3025.5673750000133},{"questionId":"q77","format":"json","model":"gpt-5-nano","expected":"1936.06","actual":"1936.06","isCorrect":true,"inputTokens":9734,"outputTokens":1098,"latencyMs":11276.571957999986},{"questionId":"q77","format":"toon","model":"gpt-5-nano","expected":"1936.06","actual":"1936.06","isCorrect":true,"inputTokens":6008,"outputTokens":1034,"latencyMs":11671.425916999986},{"questionId":"q77","format":"csv","model":"gpt-5-nano","expected":"1936.06","actual":"1936.06","isCorrect":true,"inputTokens":6776,"outputTokens":842,"latencyMs":7802.907333999989},{"questionId":"q77","format":"xml","model":"gpt-5-nano","expected":"1936.06","actual":"1936.06","isCorrect":true,"inputTokens":11032,"outputTokens":1098,"latencyMs":7163.344249999995},{"questionId":"q77","format":"yaml","model":"gpt-5-nano","expected":"1936.06","actual":"1936.06","isCorrect":true,"inputTokens":7368,"outputTokens":1354,"latencyMs":13101.612166000006},{"questionId":"q78","format":"json","model":"gpt-5-nano","expected":"44","actual":"44","isCorrect":true,"inputTokens":9738,"outputTokens":2759,"latencyMs":25064.072875000013},{"questionId":"q78","format":"toon","model":"gpt-5-nano","expected":"44","actual":"44","isCorrect":true,"inputTokens":6012,"outputTokens":1351,"latencyMs":6848.968334000005},{"questionId":"q78","format":"csv","model":"gpt-5-nano","expected":"44","actual":"46","isCorrect":false,"inputTokens":6780,"outputTokens":1415,"latencyMs":14459.32925000001},{"questionId":"q78","format":"xml","model":"gpt-5-nano","expected":"44","actual":"44","isCorrect":true,"inputTokens":11036,"outputTokens":1607,"latencyMs":16181.164584000013},{"questionId":"q78","format":"yaml","model":"gpt-5-nano","expected":"44","actual":"44","isCorrect":true,"inputTokens":7372,"outputTokens":1671,"latencyMs":13176.961833000008},{"questionId":"q79","format":"json","model":"gpt-5-nano","expected":"39","actual":"39","isCorrect":true,"inputTokens":9738,"outputTokens":1735,"latencyMs":16681.134500000015},{"questionId":"q79","format":"toon","model":"gpt-5-nano","expected":"39","actual":"39","isCorrect":true,"inputTokens":6012,"outputTokens":1031,"latencyMs":9764.626374999993},{"questionId":"q79","format":"csv","model":"gpt-5-nano","expected":"39","actual":"39","isCorrect":true,"inputTokens":6780,"outputTokens":1671,"latencyMs":15794.334374999977},{"questionId":"q79","format":"xml","model":"gpt-5-nano","expected":"39","actual":"39","isCorrect":true,"inputTokens":11036,"outputTokens":967,"latencyMs":9426.200333999994},{"questionId":"q79","format":"yaml","model":"gpt-5-nano","expected":"39","actual":"39","isCorrect":true,"inputTokens":7372,"outputTokens":1351,"latencyMs":13007.724125000008},{"questionId":"q80","format":"json","model":"gpt-5-nano","expected":"32","actual":"32","isCorrect":true,"inputTokens":9738,"outputTokens":1863,"latencyMs":19127.65849999999},{"questionId":"q80","format":"toon","model":"gpt-5-nano","expected":"32","actual":"32","isCorrect":true,"inputTokens":6012,"outputTokens":1543,"latencyMs":16356.698375000007},{"questionId":"q80","format":"csv","model":"gpt-5-nano","expected":"32","actual":"32","isCorrect":true,"inputTokens":6780,"outputTokens":1543,"latencyMs":11483.868124999979},{"questionId":"q80","format":"xml","model":"gpt-5-nano","expected":"32","actual":"32","isCorrect":true,"inputTokens":11036,"outputTokens":1351,"latencyMs":15123.078042000008},{"questionId":"q80","format":"yaml","model":"gpt-5-nano","expected":"32","actual":"31","isCorrect":false,"inputTokens":7372,"outputTokens":1287,"latencyMs":6462.253124999988},{"questionId":"q81","format":"json","model":"gpt-5-nano","expected":"7","actual":"7","isCorrect":true,"inputTokens":9742,"outputTokens":903,"latencyMs":8882.427333},{"questionId":"q81","format":"toon","model":"gpt-5-nano","expected":"7","actual":"7","isCorrect":true,"inputTokens":6016,"outputTokens":711,"latencyMs":12324.765500000009},{"questionId":"q81","format":"csv","model":"gpt-5-nano","expected":"7","actual":"7","isCorrect":true,"inputTokens":6784,"outputTokens":583,"latencyMs":6280.893833000009},{"questionId":"q81","format":"xml","model":"gpt-5-nano","expected":"7","actual":"7","isCorrect":true,"inputTokens":11040,"outputTokens":1543,"latencyMs":15681.051708000014},{"questionId":"q81","format":"yaml","model":"gpt-5-nano","expected":"7","actual":"7","isCorrect":true,"inputTokens":7376,"outputTokens":775,"latencyMs":7663.829792000004},{"questionId":"q82","format":"json","model":"gpt-5-nano","expected":"8","actual":"8","isCorrect":true,"inputTokens":9742,"outputTokens":775,"latencyMs":6653.681707999989},{"questionId":"q82","format":"toon","model":"gpt-5-nano","expected":"8","actual":"8","isCorrect":true,"inputTokens":6016,"outputTokens":775,"latencyMs":7786.641916000022},{"questionId":"q82","format":"csv","model":"gpt-5-nano","expected":"8","actual":"8","isCorrect":true,"inputTokens":6784,"outputTokens":1159,"latencyMs":10789.47875000001},{"questionId":"q82","format":"xml","model":"gpt-5-nano","expected":"8","actual":"8","isCorrect":true,"inputTokens":11040,"outputTokens":1223,"latencyMs":8007.263500000001},{"questionId":"q82","format":"yaml","model":"gpt-5-nano","expected":"8","actual":"8","isCorrect":true,"inputTokens":7376,"outputTokens":711,"latencyMs":4364.750958999997},{"questionId":"q83","format":"json","model":"gpt-5-nano","expected":"7","actual":"7","isCorrect":true,"inputTokens":9743,"outputTokens":711,"latencyMs":8292.946624999982},{"questionId":"q83","format":"toon","model":"gpt-5-nano","expected":"7","actual":"7","isCorrect":true,"inputTokens":6017,"outputTokens":839,"latencyMs":4682.287000000011},{"questionId":"q83","format":"csv","model":"gpt-5-nano","expected":"7","actual":"7","isCorrect":true,"inputTokens":6785,"outputTokens":775,"latencyMs":6971.318999999989},{"questionId":"q83","format":"xml","model":"gpt-5-nano","expected":"7","actual":"7","isCorrect":true,"inputTokens":11041,"outputTokens":1287,"latencyMs":7167.556458000006},{"questionId":"q83","format":"yaml","model":"gpt-5-nano","expected":"7","actual":"7","isCorrect":true,"inputTokens":7377,"outputTokens":1095,"latencyMs":10502.716707999993},{"questionId":"q84","format":"json","model":"gpt-5-nano","expected":"9","actual":"9","isCorrect":true,"inputTokens":9743,"outputTokens":775,"latencyMs":8604.063166999986},{"questionId":"q84","format":"toon","model":"gpt-5-nano","expected":"9","actual":"9","isCorrect":true,"inputTokens":6017,"outputTokens":839,"latencyMs":7962.534583000001},{"questionId":"q84","format":"csv","model":"gpt-5-nano","expected":"9","actual":"9","isCorrect":true,"inputTokens":6785,"outputTokens":775,"latencyMs":7521.391000000003},{"questionId":"q84","format":"xml","model":"gpt-5-nano","expected":"9","actual":"9","isCorrect":true,"inputTokens":11041,"outputTokens":1479,"latencyMs":13763.949292000005},{"questionId":"q84","format":"yaml","model":"gpt-5-nano","expected":"9","actual":"9","isCorrect":true,"inputTokens":7377,"outputTokens":775,"latencyMs":7821.052334000007},{"questionId":"q85","format":"json","model":"gpt-5-nano","expected":"9","actual":"9","isCorrect":true,"inputTokens":9743,"outputTokens":839,"latencyMs":9474.105582999997},{"questionId":"q85","format":"toon","model":"gpt-5-nano","expected":"9","actual":"9","isCorrect":true,"inputTokens":6017,"outputTokens":775,"latencyMs":7121.427082999988},{"questionId":"q85","format":"csv","model":"gpt-5-nano","expected":"9","actual":"9","isCorrect":true,"inputTokens":6785,"outputTokens":1031,"latencyMs":11699.078667000023},{"questionId":"q85","format":"xml","model":"gpt-5-nano","expected":"9","actual":"9","isCorrect":true,"inputTokens":11041,"outputTokens":1223,"latencyMs":13459.754665999993},{"questionId":"q85","format":"yaml","model":"gpt-5-nano","expected":"9","actual":"9","isCorrect":true,"inputTokens":7377,"outputTokens":583,"latencyMs":5726.723750000005},{"questionId":"q86","format":"json","model":"gpt-5-nano","expected":"6","actual":"6","isCorrect":true,"inputTokens":9742,"outputTokens":839,"latencyMs":8881.83374999999},{"questionId":"q86","format":"toon","model":"gpt-5-nano","expected":"6","actual":"6","isCorrect":true,"inputTokens":6016,"outputTokens":1095,"latencyMs":10383.40737499998},{"questionId":"q86","format":"csv","model":"gpt-5-nano","expected":"6","actual":"6","isCorrect":true,"inputTokens":6784,"outputTokens":2247,"latencyMs":18668.413291999983},{"questionId":"q86","format":"xml","model":"gpt-5-nano","expected":"6","actual":"8","isCorrect":false,"inputTokens":11040,"outputTokens":2311,"latencyMs":18610.611999999994},{"questionId":"q86","format":"yaml","model":"gpt-5-nano","expected":"6","actual":"6","isCorrect":true,"inputTokens":7376,"outputTokens":1223,"latencyMs":11865.399291000009},{"questionId":"q87","format":"json","model":"gpt-5-nano","expected":"5","actual":"4","isCorrect":false,"inputTokens":9742,"outputTokens":1095,"latencyMs":6300.196458999999},{"questionId":"q87","format":"toon","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":6016,"outputTokens":647,"latencyMs":7462.632207999995},{"questionId":"q87","format":"csv","model":"gpt-5-nano","expected":"5","actual":"10","isCorrect":false,"inputTokens":6784,"outputTokens":1479,"latencyMs":14004.076541999995},{"questionId":"q87","format":"xml","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":11040,"outputTokens":2375,"latencyMs":14972.963541999983},{"questionId":"q87","format":"yaml","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":7376,"outputTokens":1479,"latencyMs":10234.670041999983},{"questionId":"q88","format":"json","model":"gpt-5-nano","expected":"4","actual":"4","isCorrect":true,"inputTokens":9743,"outputTokens":967,"latencyMs":5632.503333000001},{"questionId":"q88","format":"toon","model":"gpt-5-nano","expected":"4","actual":"4","isCorrect":true,"inputTokens":6017,"outputTokens":711,"latencyMs":6941.359792000003},{"questionId":"q88","format":"csv","model":"gpt-5-nano","expected":"4","actual":"10","isCorrect":false,"inputTokens":6785,"outputTokens":2311,"latencyMs":22497.30016600003},{"questionId":"q88","format":"xml","model":"gpt-5-nano","expected":"4","actual":"4","isCorrect":true,"inputTokens":11041,"outputTokens":1863,"latencyMs":16778.21416600002},{"questionId":"q88","format":"yaml","model":"gpt-5-nano","expected":"4","actual":"4","isCorrect":true,"inputTokens":7377,"outputTokens":1031,"latencyMs":9174.879209000006},{"questionId":"q89","format":"json","model":"gpt-5-nano","expected":"27","actual":"27","isCorrect":true,"inputTokens":9744,"outputTokens":2439,"latencyMs":15790.312208000018},{"questionId":"q89","format":"toon","model":"gpt-5-nano","expected":"27","actual":"27","isCorrect":true,"inputTokens":6018,"outputTokens":3527,"latencyMs":20750.796208999993},{"questionId":"q89","format":"csv","model":"gpt-5-nano","expected":"27","actual":"27","isCorrect":true,"inputTokens":6786,"outputTokens":6087,"latencyMs":32360.92216700001},{"questionId":"q89","format":"xml","model":"gpt-5-nano","expected":"27","actual":"39","isCorrect":false,"inputTokens":11042,"outputTokens":5127,"latencyMs":51062.804458},{"questionId":"q89","format":"yaml","model":"gpt-5-nano","expected":"27","actual":"27","isCorrect":true,"inputTokens":7378,"outputTokens":3463,"latencyMs":30908.412584000005},{"questionId":"q90","format":"json","model":"gpt-5-nano","expected":"27","actual":"27","isCorrect":true,"inputTokens":9744,"outputTokens":1671,"latencyMs":15720.804125000024},{"questionId":"q90","format":"toon","model":"gpt-5-nano","expected":"27","actual":"27","isCorrect":true,"inputTokens":6018,"outputTokens":1927,"latencyMs":13024.252500000031},{"questionId":"q90","format":"csv","model":"gpt-5-nano","expected":"27","actual":"37","isCorrect":false,"inputTokens":6786,"outputTokens":6471,"latencyMs":58691.74404199998},{"questionId":"q90","format":"xml","model":"gpt-5-nano","expected":"27","actual":"28","isCorrect":false,"inputTokens":11042,"outputTokens":5511,"latencyMs":48244.214707999985},{"questionId":"q90","format":"yaml","model":"gpt-5-nano","expected":"27","actual":"27","isCorrect":true,"inputTokens":7378,"outputTokens":3399,"latencyMs":21301.83387500001},{"questionId":"q91","format":"json","model":"gpt-5-nano","expected":"6975","actual":"6975","isCorrect":true,"inputTokens":3712,"outputTokens":72,"latencyMs":3106.625208000012},{"questionId":"q91","format":"toon","model":"gpt-5-nano","expected":"6975","actual":"6975","isCorrect":true,"inputTokens":1563,"outputTokens":136,"latencyMs":2173.4088749999937},{"questionId":"q91","format":"csv","model":"gpt-5-nano","expected":"6975","actual":"6975","isCorrect":true,"inputTokens":1441,"outputTokens":136,"latencyMs":2207.5250000000233},{"questionId":"q91","format":"xml","model":"gpt-5-nano","expected":"6975","actual":"6975","isCorrect":true,"inputTokens":4423,"outputTokens":136,"latencyMs":2563.4236670000246},{"questionId":"q91","format":"yaml","model":"gpt-5-nano","expected":"6975","actual":"6975","isCorrect":true,"inputTokens":2985,"outputTokens":72,"latencyMs":2004.1497499999823},{"questionId":"q92","format":"json","model":"gpt-5-nano","expected":"6686.23","actual":"6686.23","isCorrect":true,"inputTokens":3711,"outputTokens":138,"latencyMs":2035.1270420000073},{"questionId":"q92","format":"toon","model":"gpt-5-nano","expected":"6686.23","actual":"6686.23","isCorrect":true,"inputTokens":1562,"outputTokens":138,"latencyMs":4099.307708000008},{"questionId":"q92","format":"csv","model":"gpt-5-nano","expected":"6686.23","actual":"6686.23","isCorrect":true,"inputTokens":1440,"outputTokens":138,"latencyMs":4950.298874999979},{"questionId":"q92","format":"xml","model":"gpt-5-nano","expected":"6686.23","actual":"6686.23","isCorrect":true,"inputTokens":4422,"outputTokens":74,"latencyMs":2060.0328749999753},{"questionId":"q92","format":"yaml","model":"gpt-5-nano","expected":"6686.23","actual":"6686.23","isCorrect":true,"inputTokens":2984,"outputTokens":138,"latencyMs":8157.924708999984},{"questionId":"q93","format":"json","model":"gpt-5-nano","expected":"33","actual":"33","isCorrect":true,"inputTokens":3712,"outputTokens":71,"latencyMs":3266.4955840000184},{"questionId":"q93","format":"toon","model":"gpt-5-nano","expected":"33","actual":"33","isCorrect":true,"inputTokens":1563,"outputTokens":135,"latencyMs":2373.3992499999586},{"questionId":"q93","format":"csv","model":"gpt-5-nano","expected":"33","actual":"33","isCorrect":true,"inputTokens":1441,"outputTokens":71,"latencyMs":1805.440333000035},{"questionId":"q93","format":"xml","model":"gpt-5-nano","expected":"33","actual":"33","isCorrect":true,"inputTokens":4423,"outputTokens":135,"latencyMs":2269.6386250000214},{"questionId":"q93","format":"yaml","model":"gpt-5-nano","expected":"33","actual":"33","isCorrect":true,"inputTokens":2985,"outputTokens":71,"latencyMs":2004.672957999981},{"questionId":"q94","format":"json","model":"gpt-5-nano","expected":"377","actual":"377","isCorrect":true,"inputTokens":3712,"outputTokens":71,"latencyMs":1775.2346249999828},{"questionId":"q94","format":"toon","model":"gpt-5-nano","expected":"377","actual":"377","isCorrect":true,"inputTokens":1563,"outputTokens":455,"latencyMs":6650.344334000023},{"questionId":"q94","format":"csv","model":"gpt-5-nano","expected":"377","actual":"377","isCorrect":true,"inputTokens":1441,"outputTokens":71,"latencyMs":2128.058707999997},{"questionId":"q94","format":"xml","model":"gpt-5-nano","expected":"377","actual":"377","isCorrect":true,"inputTokens":4423,"outputTokens":199,"latencyMs":4896.449458000017},{"questionId":"q94","format":"yaml","model":"gpt-5-nano","expected":"377","actual":"377","isCorrect":true,"inputTokens":2985,"outputTokens":71,"latencyMs":1800.050500000012},{"questionId":"q95","format":"json","model":"gpt-5-nano","expected":"0.44","actual":"0.44","isCorrect":true,"inputTokens":3712,"outputTokens":201,"latencyMs":2280.200375000015},{"questionId":"q95","format":"toon","model":"gpt-5-nano","expected":"0.44","actual":"0.44","isCorrect":true,"inputTokens":1563,"outputTokens":201,"latencyMs":2640.3976660000044},{"questionId":"q95","format":"csv","model":"gpt-5-nano","expected":"0.44","actual":"0.44","isCorrect":true,"inputTokens":1441,"outputTokens":137,"latencyMs":2159.501833999995},{"questionId":"q95","format":"xml","model":"gpt-5-nano","expected":"0.44","actual":"0.44","isCorrect":true,"inputTokens":4423,"outputTokens":201,"latencyMs":2729.7381250000326},{"questionId":"q95","format":"yaml","model":"gpt-5-nano","expected":"0.44","actual":"0.44","isCorrect":true,"inputTokens":2985,"outputTokens":137,"latencyMs":2862.6320000000414},{"questionId":"q96","format":"json","model":"gpt-5-nano","expected":"7621","actual":"7621","isCorrect":true,"inputTokens":3712,"outputTokens":136,"latencyMs":1977.3999170000316},{"questionId":"q96","format":"toon","model":"gpt-5-nano","expected":"7621","actual":"7621","isCorrect":true,"inputTokens":1563,"outputTokens":136,"latencyMs":1777.6621659999946},{"questionId":"q96","format":"csv","model":"gpt-5-nano","expected":"7621","actual":"7621","isCorrect":true,"inputTokens":1441,"outputTokens":200,"latencyMs":2808.3442500000237},{"questionId":"q96","format":"xml","model":"gpt-5-nano","expected":"7621","actual":"7621","isCorrect":true,"inputTokens":4423,"outputTokens":136,"latencyMs":2431.7366250000196},{"questionId":"q96","format":"yaml","model":"gpt-5-nano","expected":"7621","actual":"7621","isCorrect":true,"inputTokens":2985,"outputTokens":264,"latencyMs":3476.1824170000036},{"questionId":"q97","format":"json","model":"gpt-5-nano","expected":"1827.12","actual":"1827.12","isCorrect":true,"inputTokens":3711,"outputTokens":266,"latencyMs":2816.1715420000255},{"questionId":"q97","format":"toon","model":"gpt-5-nano","expected":"1827.12","actual":"1827.12","isCorrect":true,"inputTokens":1562,"outputTokens":138,"latencyMs":4694.5291669999715},{"questionId":"q97","format":"csv","model":"gpt-5-nano","expected":"1827.12","actual":"1827.12","isCorrect":true,"inputTokens":1440,"outputTokens":74,"latencyMs":1895.272500000021},{"questionId":"q97","format":"xml","model":"gpt-5-nano","expected":"1827.12","actual":"1827.12","isCorrect":true,"inputTokens":4422,"outputTokens":74,"latencyMs":1696.6640840000473},{"questionId":"q97","format":"yaml","model":"gpt-5-nano","expected":"1827.12","actual":"1827.12","isCorrect":true,"inputTokens":2984,"outputTokens":138,"latencyMs":2057.5235840000096},{"questionId":"q98","format":"json","model":"gpt-5-nano","expected":"44","actual":"44","isCorrect":true,"inputTokens":3712,"outputTokens":71,"latencyMs":2102.908333999978},{"questionId":"q98","format":"toon","model":"gpt-5-nano","expected":"44","actual":"44","isCorrect":true,"inputTokens":1563,"outputTokens":135,"latencyMs":2015.2874580000062},{"questionId":"q98","format":"csv","model":"gpt-5-nano","expected":"44","actual":"44","isCorrect":true,"inputTokens":1441,"outputTokens":391,"latencyMs":4864.857958999986},{"questionId":"q98","format":"xml","model":"gpt-5-nano","expected":"44","actual":"44","isCorrect":true,"inputTokens":4423,"outputTokens":263,"latencyMs":2451.5397079999675},{"questionId":"q98","format":"yaml","model":"gpt-5-nano","expected":"44","actual":"44","isCorrect":true,"inputTokens":2985,"outputTokens":327,"latencyMs":3204.123082999955},{"questionId":"q99","format":"json","model":"gpt-5-nano","expected":"411","actual":"411","isCorrect":true,"inputTokens":3712,"outputTokens":327,"latencyMs":7051.611250000016},{"questionId":"q99","format":"toon","model":"gpt-5-nano","expected":"411","actual":"411","isCorrect":true,"inputTokens":1563,"outputTokens":199,"latencyMs":2742.971750000026},{"questionId":"q99","format":"csv","model":"gpt-5-nano","expected":"411","actual":"411","isCorrect":true,"inputTokens":1441,"outputTokens":455,"latencyMs":3407.819332999992},{"questionId":"q99","format":"xml","model":"gpt-5-nano","expected":"411","actual":"411","isCorrect":true,"inputTokens":4423,"outputTokens":135,"latencyMs":1760.8075839999947},{"questionId":"q99","format":"yaml","model":"gpt-5-nano","expected":"411","actual":"411","isCorrect":true,"inputTokens":2985,"outputTokens":199,"latencyMs":2489.05237499997},{"questionId":"q100","format":"json","model":"gpt-5-nano","expected":"0.48","actual":"0.48","isCorrect":true,"inputTokens":3712,"outputTokens":201,"latencyMs":2107.64816599997},{"questionId":"q100","format":"toon","model":"gpt-5-nano","expected":"0.48","actual":"0.48","isCorrect":true,"inputTokens":1563,"outputTokens":201,"latencyMs":2152.561207999999},{"questionId":"q100","format":"csv","model":"gpt-5-nano","expected":"0.48","actual":"0.48","isCorrect":true,"inputTokens":1441,"outputTokens":201,"latencyMs":2159.094207999995},{"questionId":"q100","format":"xml","model":"gpt-5-nano","expected":"0.48","actual":"0.48","isCorrect":true,"inputTokens":4423,"outputTokens":585,"latencyMs":8331.69312499999},{"questionId":"q100","format":"yaml","model":"gpt-5-nano","expected":"0.48","actual":"0.48","isCorrect":true,"inputTokens":2985,"outputTokens":137,"latencyMs":3471.0555000000168},{"questionId":"q101","format":"json","model":"gpt-5-nano","expected":"4696","actual":"4696","isCorrect":true,"inputTokens":3712,"outputTokens":136,"latencyMs":2086.7599580000388},{"questionId":"q101","format":"toon","model":"gpt-5-nano","expected":"4696","actual":"4696","isCorrect":true,"inputTokens":1563,"outputTokens":328,"latencyMs":4088.7678339999984},{"questionId":"q101","format":"csv","model":"gpt-5-nano","expected":"4696","actual":"4696","isCorrect":true,"inputTokens":1441,"outputTokens":136,"latencyMs":2125.830750000023},{"questionId":"q101","format":"xml","model":"gpt-5-nano","expected":"4696","actual":"4696","isCorrect":true,"inputTokens":4423,"outputTokens":200,"latencyMs":1937.344999999972},{"questionId":"q101","format":"yaml","model":"gpt-5-nano","expected":"4696","actual":"4696","isCorrect":true,"inputTokens":2985,"outputTokens":136,"latencyMs":1919.3835419999668},{"questionId":"q102","format":"json","model":"gpt-5-nano","expected":"4211.6","actual":"4211.6","isCorrect":true,"inputTokens":3711,"outputTokens":202,"latencyMs":1896.579999999958},{"questionId":"q102","format":"toon","model":"gpt-5-nano","expected":"4211.6","actual":"4211.6","isCorrect":true,"inputTokens":1562,"outputTokens":74,"latencyMs":1951.8673749999725},{"questionId":"q102","format":"csv","model":"gpt-5-nano","expected":"4211.6","actual":"4211.6","isCorrect":true,"inputTokens":1440,"outputTokens":138,"latencyMs":1431.4333340000012},{"questionId":"q102","format":"xml","model":"gpt-5-nano","expected":"4211.6","actual":"4211.6","isCorrect":true,"inputTokens":4422,"outputTokens":266,"latencyMs":2830.9484999999986},{"questionId":"q102","format":"yaml","model":"gpt-5-nano","expected":"4211.6","actual":"4211.6","isCorrect":true,"inputTokens":2984,"outputTokens":202,"latencyMs":2569.2954589999863},{"questionId":"q103","format":"json","model":"gpt-5-nano","expected":"23","actual":"23","isCorrect":true,"inputTokens":3712,"outputTokens":135,"latencyMs":3055.4072909999522},{"questionId":"q103","format":"toon","model":"gpt-5-nano","expected":"23","actual":"23","isCorrect":true,"inputTokens":1563,"outputTokens":327,"latencyMs":6063.461208000022},{"questionId":"q103","format":"csv","model":"gpt-5-nano","expected":"23","actual":"23","isCorrect":true,"inputTokens":1441,"outputTokens":135,"latencyMs":1830.0050420000334},{"questionId":"q103","format":"xml","model":"gpt-5-nano","expected":"23","actual":"23","isCorrect":true,"inputTokens":4423,"outputTokens":135,"latencyMs":2073.8175000000047},{"questionId":"q103","format":"yaml","model":"gpt-5-nano","expected":"23","actual":"23","isCorrect":true,"inputTokens":2985,"outputTokens":135,"latencyMs":2024.4842910000007},{"questionId":"q104","format":"json","model":"gpt-5-nano","expected":"344498","actual":"344498","isCorrect":true,"inputTokens":3709,"outputTokens":1864,"latencyMs":20743.07024999999},{"questionId":"q104","format":"toon","model":"gpt-5-nano","expected":"344498","actual":"344498","isCorrect":true,"inputTokens":1560,"outputTokens":2376,"latencyMs":19158.497167000023},{"questionId":"q104","format":"csv","model":"gpt-5-nano","expected":"344498","actual":"344498","isCorrect":true,"inputTokens":1438,"outputTokens":4040,"latencyMs":33570.64941699995},{"questionId":"q104","format":"xml","model":"gpt-5-nano","expected":"344498","actual":"344498","isCorrect":true,"inputTokens":4420,"outputTokens":2120,"latencyMs":18319.398874999955},{"questionId":"q104","format":"yaml","model":"gpt-5-nano","expected":"344498","actual":"344498","isCorrect":true,"inputTokens":2982,"outputTokens":2504,"latencyMs":17486.27891699999},{"questionId":"q105","format":"json","model":"gpt-5-nano","expected":"312818.50","actual":"312818.50","isCorrect":true,"inputTokens":3707,"outputTokens":4042,"latencyMs":27715.301083000028},{"questionId":"q105","format":"toon","model":"gpt-5-nano","expected":"312818.50","actual":"312818.50","isCorrect":true,"inputTokens":1558,"outputTokens":2634,"latencyMs":17378.131166999985},{"questionId":"q105","format":"csv","model":"gpt-5-nano","expected":"312818.50","actual":"312718.50","isCorrect":false,"inputTokens":1436,"outputTokens":2954,"latencyMs":29288.556417000014},{"questionId":"q105","format":"xml","model":"gpt-5-nano","expected":"312818.50","actual":"312818.50","isCorrect":true,"inputTokens":4418,"outputTokens":7690,"latencyMs":46754.21683300001},{"questionId":"q105","format":"yaml","model":"gpt-5-nano","expected":"312818.50","actual":"312818.50","isCorrect":true,"inputTokens":2980,"outputTokens":3786,"latencyMs":33383.13175},{"questionId":"q106","format":"json","model":"gpt-5-nano","expected":"1811","actual":"1811","isCorrect":true,"inputTokens":3709,"outputTokens":1544,"latencyMs":13894.615500000014},{"questionId":"q106","format":"toon","model":"gpt-5-nano","expected":"1811","actual":"1811","isCorrect":true,"inputTokens":1560,"outputTokens":1928,"latencyMs":12648.721375000023},{"questionId":"q106","format":"csv","model":"gpt-5-nano","expected":"1811","actual":"1811","isCorrect":true,"inputTokens":1438,"outputTokens":1928,"latencyMs":18158.010540999996},{"questionId":"q106","format":"xml","model":"gpt-5-nano","expected":"1811","actual":"1811","isCorrect":true,"inputTokens":4420,"outputTokens":2568,"latencyMs":15732.940917},{"questionId":"q106","format":"yaml","model":"gpt-5-nano","expected":"1811","actual":"1811","isCorrect":true,"inputTokens":2982,"outputTokens":1288,"latencyMs":10955.163375000004},{"questionId":"q107","format":"json","model":"gpt-5-nano","expected":"5742","actual":"5741.63","isCorrect":false,"inputTokens":3708,"outputTokens":2826,"latencyMs":26201.144542000024},{"questionId":"q107","format":"toon","model":"gpt-5-nano","expected":"5742","actual":"5741.63","isCorrect":false,"inputTokens":1559,"outputTokens":3594,"latencyMs":35990.964875000005},{"questionId":"q107","format":"csv","model":"gpt-5-nano","expected":"5742","actual":"5741.63","isCorrect":true,"inputTokens":1437,"outputTokens":2890,"latencyMs":23745.996999999974},{"questionId":"q107","format":"xml","model":"gpt-5-nano","expected":"5742","actual":"5741.63","isCorrect":false,"inputTokens":4419,"outputTokens":2762,"latencyMs":23545.925084000046},{"questionId":"q107","format":"yaml","model":"gpt-5-nano","expected":"5742","actual":"5741.63","isCorrect":false,"inputTokens":2981,"outputTokens":2122,"latencyMs":18222.963749999995},{"questionId":"q108","format":"json","model":"gpt-5-nano","expected":"5213.64","actual":"5401.64","isCorrect":false,"inputTokens":3706,"outputTokens":1866,"latencyMs":10071.50433299999},{"questionId":"q108","format":"toon","model":"gpt-5-nano","expected":"5213.64","actual":"5213.64","isCorrect":true,"inputTokens":1557,"outputTokens":5066,"latencyMs":36936.507458999986},{"questionId":"q108","format":"csv","model":"gpt-5-nano","expected":"5213.64","actual":"5211.98","isCorrect":false,"inputTokens":1435,"outputTokens":3722,"latencyMs":31915.33920799999},{"questionId":"q108","format":"xml","model":"gpt-5-nano","expected":"5213.64","actual":"5213.64","isCorrect":true,"inputTokens":4417,"outputTokens":4042,"latencyMs":22632.840333},{"questionId":"q108","format":"yaml","model":"gpt-5-nano","expected":"5213.64","actual":"5213.64","isCorrect":true,"inputTokens":2979,"outputTokens":3850,"latencyMs":18988.701457999996},{"questionId":"q109","format":"json","model":"gpt-5-nano","expected":"30","actual":"30.18","isCorrect":false,"inputTokens":3708,"outputTokens":1353,"latencyMs":13138.985000000044},{"questionId":"q109","format":"toon","model":"gpt-5-nano","expected":"30","actual":"30.1833333333","isCorrect":false,"inputTokens":1559,"outputTokens":1996,"latencyMs":26100.575125000032},{"questionId":"q109","format":"csv","model":"gpt-5-nano","expected":"30","actual":"30.18","isCorrect":false,"inputTokens":1437,"outputTokens":2697,"latencyMs":24620.171333000006},{"questionId":"q109","format":"xml","model":"gpt-5-nano","expected":"30","actual":"30.18","isCorrect":false,"inputTokens":4419,"outputTokens":2825,"latencyMs":18780.89512500004},{"questionId":"q109","format":"yaml","model":"gpt-5-nano","expected":"30","actual":"30.1833333333","isCorrect":false,"inputTokens":2981,"outputTokens":3084,"latencyMs":30253.369750000013},{"questionId":"q110","format":"json","model":"gpt-5-nano","expected":"60","actual":"60","isCorrect":true,"inputTokens":3708,"outputTokens":391,"latencyMs":4351.089999999967},{"questionId":"q110","format":"toon","model":"gpt-5-nano","expected":"60","actual":"60","isCorrect":true,"inputTokens":1559,"outputTokens":327,"latencyMs":3603.5555000000168},{"questionId":"q110","format":"csv","model":"gpt-5-nano","expected":"60","actual":"60","isCorrect":true,"inputTokens":1437,"outputTokens":263,"latencyMs":3470.5262499999953},{"questionId":"q110","format":"xml","model":"gpt-5-nano","expected":"60","actual":"60","isCorrect":true,"inputTokens":4419,"outputTokens":263,"latencyMs":3301.9788749999716},{"questionId":"q110","format":"yaml","model":"gpt-5-nano","expected":"60","actual":"60","isCorrect":true,"inputTokens":2981,"outputTokens":391,"latencyMs":5403.282624999993},{"questionId":"q111","format":"json","model":"gpt-5-nano","expected":"7944","actual":"7944","isCorrect":true,"inputTokens":3711,"outputTokens":520,"latencyMs":3600.170083999983},{"questionId":"q111","format":"toon","model":"gpt-5-nano","expected":"7944","actual":"7944","isCorrect":true,"inputTokens":1562,"outputTokens":712,"latencyMs":7900.946333000029},{"questionId":"q111","format":"csv","model":"gpt-5-nano","expected":"7944","actual":"7944","isCorrect":true,"inputTokens":1440,"outputTokens":648,"latencyMs":7093.944542000012},{"questionId":"q111","format":"xml","model":"gpt-5-nano","expected":"7944","actual":"7944","isCorrect":true,"inputTokens":4422,"outputTokens":1352,"latencyMs":12142.23683400004},{"questionId":"q111","format":"yaml","model":"gpt-5-nano","expected":"7944","actual":"7944","isCorrect":true,"inputTokens":2984,"outputTokens":776,"latencyMs":9676.69750000001},{"questionId":"q112","format":"json","model":"gpt-5-nano","expected":"42","actual":"42","isCorrect":true,"inputTokens":3709,"outputTokens":2823,"latencyMs":26626.55466700002},{"questionId":"q112","format":"toon","model":"gpt-5-nano","expected":"42","actual":"42","isCorrect":true,"inputTokens":1560,"outputTokens":1479,"latencyMs":11620.979290999996},{"questionId":"q112","format":"csv","model":"gpt-5-nano","expected":"42","actual":"42","isCorrect":true,"inputTokens":1438,"outputTokens":1799,"latencyMs":17816.583874999953},{"questionId":"q112","format":"xml","model":"gpt-5-nano","expected":"42","actual":"42","isCorrect":true,"inputTokens":4420,"outputTokens":1991,"latencyMs":11608.117665999976},{"questionId":"q112","format":"yaml","model":"gpt-5-nano","expected":"42","actual":"42","isCorrect":true,"inputTokens":2982,"outputTokens":1927,"latencyMs":17976.007583},{"questionId":"q113","format":"json","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":3709,"outputTokens":1351,"latencyMs":7504.992665999976},{"questionId":"q113","format":"toon","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":1560,"outputTokens":1287,"latencyMs":8089.16079200001},{"questionId":"q113","format":"csv","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":1438,"outputTokens":1159,"latencyMs":8368.544332999969},{"questionId":"q113","format":"xml","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":4420,"outputTokens":1351,"latencyMs":9879.407125000027},{"questionId":"q113","format":"yaml","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":2982,"outputTokens":1415,"latencyMs":8203.717042000033},{"questionId":"q114","format":"json","model":"gpt-5-nano","expected":"26","actual":"26","isCorrect":true,"inputTokens":3715,"outputTokens":2503,"latencyMs":14300.482250000001},{"questionId":"q114","format":"toon","model":"gpt-5-nano","expected":"26","actual":"26","isCorrect":true,"inputTokens":1566,"outputTokens":1863,"latencyMs":19860.288916999998},{"questionId":"q114","format":"csv","model":"gpt-5-nano","expected":"26","actual":"26","isCorrect":true,"inputTokens":1444,"outputTokens":3207,"latencyMs":17891.136750000005},{"questionId":"q114","format":"xml","model":"gpt-5-nano","expected":"26","actual":"26","isCorrect":true,"inputTokens":4426,"outputTokens":2183,"latencyMs":23856.857374999963},{"questionId":"q114","format":"yaml","model":"gpt-5-nano","expected":"26","actual":"27","isCorrect":false,"inputTokens":2988,"outputTokens":1863,"latencyMs":15280.603833000001},{"questionId":"q115","format":"json","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":3715,"outputTokens":1287,"latencyMs":14521.147874999966},{"questionId":"q115","format":"toon","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":1566,"outputTokens":1671,"latencyMs":19639.551666999992},{"questionId":"q115","format":"csv","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":1444,"outputTokens":1415,"latencyMs":8054.100792000012},{"questionId":"q115","format":"xml","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":4426,"outputTokens":1799,"latencyMs":18204.095917000028},{"questionId":"q115","format":"yaml","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":2988,"outputTokens":1415,"latencyMs":13753.654209},{"questionId":"q116","format":"json","model":"gpt-5-nano","expected":"28","actual":"28","isCorrect":true,"inputTokens":3715,"outputTokens":1863,"latencyMs":11231.150665999972},{"questionId":"q116","format":"toon","model":"gpt-5-nano","expected":"28","actual":"28","isCorrect":true,"inputTokens":1566,"outputTokens":2247,"latencyMs":16329.248583000037},{"questionId":"q116","format":"csv","model":"gpt-5-nano","expected":"28","actual":"28","isCorrect":true,"inputTokens":1444,"outputTokens":1671,"latencyMs":15908.416999999958},{"questionId":"q116","format":"xml","model":"gpt-5-nano","expected":"28","actual":"28","isCorrect":true,"inputTokens":4426,"outputTokens":3015,"latencyMs":32053.260583999974},{"questionId":"q116","format":"yaml","model":"gpt-5-nano","expected":"28","actual":"28","isCorrect":true,"inputTokens":2988,"outputTokens":1991,"latencyMs":15593.033584000019},{"questionId":"q117","format":"json","model":"gpt-5-nano","expected":"28","actual":"28","isCorrect":true,"inputTokens":3716,"outputTokens":2247,"latencyMs":22851.75224999996},{"questionId":"q117","format":"toon","model":"gpt-5-nano","expected":"28","actual":"28","isCorrect":true,"inputTokens":1567,"outputTokens":2119,"latencyMs":20895.994542},{"questionId":"q117","format":"csv","model":"gpt-5-nano","expected":"28","actual":"28","isCorrect":true,"inputTokens":1445,"outputTokens":2119,"latencyMs":13167.545125000004},{"questionId":"q117","format":"xml","model":"gpt-5-nano","expected":"28","actual":"28","isCorrect":true,"inputTokens":4427,"outputTokens":2247,"latencyMs":26842.44524999999},{"questionId":"q117","format":"yaml","model":"gpt-5-nano","expected":"28","actual":"28","isCorrect":true,"inputTokens":2989,"outputTokens":2119,"latencyMs":14630.024459000037},{"questionId":"q118","format":"json","model":"gpt-5-nano","expected":"28","actual":"28","isCorrect":true,"inputTokens":3716,"outputTokens":2439,"latencyMs":13179.716833000013},{"questionId":"q118","format":"toon","model":"gpt-5-nano","expected":"28","actual":"28","isCorrect":true,"inputTokens":1567,"outputTokens":2311,"latencyMs":12719.164832999988},{"questionId":"q118","format":"csv","model":"gpt-5-nano","expected":"28","actual":"28","isCorrect":true,"inputTokens":1445,"outputTokens":2695,"latencyMs":13549.327916999988},{"questionId":"q118","format":"xml","model":"gpt-5-nano","expected":"28","actual":"28","isCorrect":true,"inputTokens":4427,"outputTokens":2183,"latencyMs":23100.062124999997},{"questionId":"q118","format":"yaml","model":"gpt-5-nano","expected":"28","actual":"28","isCorrect":true,"inputTokens":2989,"outputTokens":3719,"latencyMs":18481.843249999976},{"questionId":"q119","format":"json","model":"gpt-5-nano","expected":"26","actual":"26","isCorrect":true,"inputTokens":3716,"outputTokens":1863,"latencyMs":20471.790625000023},{"questionId":"q119","format":"toon","model":"gpt-5-nano","expected":"26","actual":"26","isCorrect":true,"inputTokens":1567,"outputTokens":2887,"latencyMs":14944.100292000046},{"questionId":"q119","format":"csv","model":"gpt-5-nano","expected":"26","actual":"26","isCorrect":true,"inputTokens":1445,"outputTokens":1863,"latencyMs":11682.023917000042},{"questionId":"q119","format":"xml","model":"gpt-5-nano","expected":"26","actual":"26","isCorrect":true,"inputTokens":4427,"outputTokens":4615,"latencyMs":26417.39070799999},{"questionId":"q119","format":"yaml","model":"gpt-5-nano","expected":"26","actual":"26","isCorrect":true,"inputTokens":2989,"outputTokens":3719,"latencyMs":32432.48529099999},{"questionId":"q120","format":"json","model":"gpt-5-nano","expected":"25","actual":"25","isCorrect":true,"inputTokens":3716,"outputTokens":1927,"latencyMs":11993.960625000007},{"questionId":"q120","format":"toon","model":"gpt-5-nano","expected":"25","actual":"25","isCorrect":true,"inputTokens":1567,"outputTokens":1479,"latencyMs":19029.13149999996},{"questionId":"q120","format":"csv","model":"gpt-5-nano","expected":"25","actual":"25","isCorrect":true,"inputTokens":1445,"outputTokens":2119,"latencyMs":18643.346375000023},{"questionId":"q120","format":"xml","model":"gpt-5-nano","expected":"25","actual":"25","isCorrect":true,"inputTokens":4427,"outputTokens":3399,"latencyMs":25141.51587500004},{"questionId":"q120","format":"yaml","model":"gpt-5-nano","expected":"25","actual":"25","isCorrect":true,"inputTokens":2989,"outputTokens":3527,"latencyMs":29699.743499999982},{"questionId":"q121","format":"json","model":"gpt-5-nano","expected":"35","actual":"35","isCorrect":true,"inputTokens":3714,"outputTokens":3271,"latencyMs":21694.63841699995},{"questionId":"q121","format":"toon","model":"gpt-5-nano","expected":"35","actual":"35","isCorrect":true,"inputTokens":1565,"outputTokens":2183,"latencyMs":15088.209875},{"questionId":"q121","format":"csv","model":"gpt-5-nano","expected":"35","actual":"35","isCorrect":true,"inputTokens":1443,"outputTokens":2119,"latencyMs":22566.850584},{"questionId":"q121","format":"xml","model":"gpt-5-nano","expected":"35","actual":"36","isCorrect":false,"inputTokens":4425,"outputTokens":2055,"latencyMs":14981.830625000002},{"questionId":"q121","format":"yaml","model":"gpt-5-nano","expected":"35","actual":"35","isCorrect":true,"inputTokens":2987,"outputTokens":2631,"latencyMs":15605.16954100004},{"questionId":"q122","format":"json","model":"gpt-5-nano","expected":"12","actual":"12","isCorrect":true,"inputTokens":3714,"outputTokens":1607,"latencyMs":10281.626707999967},{"questionId":"q122","format":"toon","model":"gpt-5-nano","expected":"12","actual":"12","isCorrect":true,"inputTokens":1565,"outputTokens":1799,"latencyMs":10508.287624999997},{"questionId":"q122","format":"csv","model":"gpt-5-nano","expected":"12","actual":"12","isCorrect":true,"inputTokens":1443,"outputTokens":1543,"latencyMs":13406.946959000023},{"questionId":"q122","format":"xml","model":"gpt-5-nano","expected":"12","actual":"12","isCorrect":true,"inputTokens":4425,"outputTokens":1735,"latencyMs":19976.58150000003},{"questionId":"q122","format":"yaml","model":"gpt-5-nano","expected":"12","actual":"12","isCorrect":true,"inputTokens":2987,"outputTokens":1223,"latencyMs":7062.52566699998},{"questionId":"q123","format":"json","model":"gpt-5-nano","expected":"32","actual":"32","isCorrect":true,"inputTokens":3718,"outputTokens":2439,"latencyMs":22098.910083999974},{"questionId":"q123","format":"toon","model":"gpt-5-nano","expected":"32","actual":"32","isCorrect":true,"inputTokens":1569,"outputTokens":2119,"latencyMs":24676.80033300002},{"questionId":"q123","format":"csv","model":"gpt-5-nano","expected":"32","actual":"32","isCorrect":true,"inputTokens":1447,"outputTokens":4615,"latencyMs":47459.09729100001},{"questionId":"q123","format":"xml","model":"gpt-5-nano","expected":"32","actual":"32","isCorrect":true,"inputTokens":4429,"outputTokens":2951,"latencyMs":28576.63483300002},{"questionId":"q123","format":"yaml","model":"gpt-5-nano","expected":"32","actual":"32","isCorrect":true,"inputTokens":2991,"outputTokens":3463,"latencyMs":22446.219542000035},{"questionId":"q124","format":"json","model":"gpt-5-nano","expected":"32","actual":"32","isCorrect":true,"inputTokens":3718,"outputTokens":2503,"latencyMs":23901.167791999993},{"questionId":"q124","format":"toon","model":"gpt-5-nano","expected":"32","actual":"32","isCorrect":true,"inputTokens":1569,"outputTokens":2503,"latencyMs":17261.147666999954},{"questionId":"q124","format":"csv","model":"gpt-5-nano","expected":"32","actual":"32","isCorrect":true,"inputTokens":1447,"outputTokens":3143,"latencyMs":21859.215417},{"questionId":"q124","format":"xml","model":"gpt-5-nano","expected":"32","actual":"32","isCorrect":true,"inputTokens":4429,"outputTokens":2439,"latencyMs":25891.808375000022},{"questionId":"q124","format":"yaml","model":"gpt-5-nano","expected":"32","actual":"32","isCorrect":true,"inputTokens":2991,"outputTokens":2311,"latencyMs":14862.04933400004},{"questionId":"q125","format":"json","model":"gpt-5-nano","expected":"430886","actual":"430886","isCorrect":true,"inputTokens":15189,"outputTokens":264,"latencyMs":6322.1847919999855},{"questionId":"q125","format":"toon","model":"gpt-5-nano","expected":"430886","actual":"430886","isCorrect":true,"inputTokens":8790,"outputTokens":456,"latencyMs":8064.342333999986},{"questionId":"q125","format":"csv","model":"gpt-5-nano","expected":"430886","actual":"430886","isCorrect":true,"inputTokens":8558,"outputTokens":136,"latencyMs":2674.229042000021},{"questionId":"q125","format":"xml","model":"gpt-5-nano","expected":"430886","actual":"430886","isCorrect":true,"inputTokens":17140,"outputTokens":200,"latencyMs":2699.6745830000145},{"questionId":"q125","format":"yaml","model":"gpt-5-nano","expected":"430886","actual":"430886","isCorrect":true,"inputTokens":13173,"outputTokens":72,"latencyMs":2387.1922920000507},{"questionId":"q126","format":"json","model":"gpt-5-nano","expected":"52904","actual":"52904","isCorrect":true,"inputTokens":15191,"outputTokens":264,"latencyMs":10286.163166999992},{"questionId":"q126","format":"toon","model":"gpt-5-nano","expected":"52904","actual":"52904","isCorrect":true,"inputTokens":8792,"outputTokens":264,"latencyMs":3193.7972500000033},{"questionId":"q126","format":"csv","model":"gpt-5-nano","expected":"52904","actual":"52904","isCorrect":true,"inputTokens":8560,"outputTokens":392,"latencyMs":4903.814499999979},{"questionId":"q126","format":"xml","model":"gpt-5-nano","expected":"52904","actual":"52904","isCorrect":true,"inputTokens":17142,"outputTokens":328,"latencyMs":7727.762624999974},{"questionId":"q126","format":"yaml","model":"gpt-5-nano","expected":"52904","actual":"52904","isCorrect":true,"inputTokens":13175,"outputTokens":264,"latencyMs":3304.836208999972},{"questionId":"q127","format":"json","model":"gpt-5-nano","expected":"vuejs","actual":"vuejs","isCorrect":true,"inputTokens":15186,"outputTokens":264,"latencyMs":4128.396166999999},{"questionId":"q127","format":"toon","model":"gpt-5-nano","expected":"vuejs","actual":"vuejs","isCorrect":true,"inputTokens":8787,"outputTokens":200,"latencyMs":3041.617041999998},{"questionId":"q127","format":"csv","model":"gpt-5-nano","expected":"vuejs","actual":"vuejs","isCorrect":true,"inputTokens":8555,"outputTokens":264,"latencyMs":3009.996334000025},{"questionId":"q127","format":"xml","model":"gpt-5-nano","expected":"vuejs","actual":"vuejs","isCorrect":true,"inputTokens":17137,"outputTokens":264,"latencyMs":4328.163291999954},{"questionId":"q127","format":"yaml","model":"gpt-5-nano","expected":"vuejs","actual":"vuejs","isCorrect":true,"inputTokens":13170,"outputTokens":200,"latencyMs":3640.219541999977},{"questionId":"q128","format":"json","model":"gpt-5-nano","expected":"master","actual":"master","isCorrect":true,"inputTokens":15192,"outputTokens":199,"latencyMs":3064.0731659999583},{"questionId":"q128","format":"toon","model":"gpt-5-nano","expected":"master","actual":"master","isCorrect":true,"inputTokens":8793,"outputTokens":199,"latencyMs":2740.8643750000047},{"questionId":"q128","format":"csv","model":"gpt-5-nano","expected":"master","actual":"master","isCorrect":true,"inputTokens":8561,"outputTokens":199,"latencyMs":2650.438375000027},{"questionId":"q128","format":"xml","model":"gpt-5-nano","expected":"master","actual":"master","isCorrect":true,"inputTokens":17143,"outputTokens":263,"latencyMs":3573.017332999967},{"questionId":"q128","format":"yaml","model":"gpt-5-nano","expected":"master","actual":"master","isCorrect":true,"inputTokens":13176,"outputTokens":263,"latencyMs":12294.340290999971},{"questionId":"q129","format":"json","model":"gpt-5-nano","expected":"3367","actual":"3367","isCorrect":true,"inputTokens":15186,"outputTokens":264,"latencyMs":4325.090625000012},{"questionId":"q129","format":"toon","model":"gpt-5-nano","expected":"3367","actual":"3367","isCorrect":true,"inputTokens":8787,"outputTokens":328,"latencyMs":3983.594792000018},{"questionId":"q129","format":"csv","model":"gpt-5-nano","expected":"3367","actual":"3367","isCorrect":true,"inputTokens":8555,"outputTokens":328,"latencyMs":5348.565249999985},{"questionId":"q129","format":"xml","model":"gpt-5-nano","expected":"3367","actual":"3367","isCorrect":true,"inputTokens":17137,"outputTokens":264,"latencyMs":5281.116500000004},{"questionId":"q129","format":"yaml","model":"gpt-5-nano","expected":"3367","actual":"3367","isCorrect":true,"inputTokens":13170,"outputTokens":200,"latencyMs":4126.400749999972},{"questionId":"q130","format":"json","model":"gpt-5-nano","expected":"152300","actual":"152300","isCorrect":true,"inputTokens":15191,"outputTokens":264,"latencyMs":6962.222958999977},{"questionId":"q130","format":"toon","model":"gpt-5-nano","expected":"152300","actual":"152300","isCorrect":true,"inputTokens":8792,"outputTokens":392,"latencyMs":3621.357874999987},{"questionId":"q130","format":"csv","model":"gpt-5-nano","expected":"152300","actual":"152300","isCorrect":true,"inputTokens":8560,"outputTokens":648,"latencyMs":4996.8137080000015},{"questionId":"q130","format":"xml","model":"gpt-5-nano","expected":"152300","actual":"152300","isCorrect":true,"inputTokens":17142,"outputTokens":264,"latencyMs":6185.885999999999},{"questionId":"q130","format":"yaml","model":"gpt-5-nano","expected":"152300","actual":"152300","isCorrect":true,"inputTokens":13175,"outputTokens":200,"latencyMs":3915.9683339999756},{"questionId":"q131","format":"json","model":"gpt-5-nano","expected":"10668","actual":"10668","isCorrect":true,"inputTokens":15190,"outputTokens":264,"latencyMs":3767.001791000017},{"questionId":"q131","format":"toon","model":"gpt-5-nano","expected":"10668","actual":"10668","isCorrect":true,"inputTokens":8791,"outputTokens":392,"latencyMs":3681.7863329999964},{"questionId":"q131","format":"csv","model":"gpt-5-nano","expected":"10668","actual":"10668","isCorrect":true,"inputTokens":8559,"outputTokens":200,"latencyMs":3517.4621669999906},{"questionId":"q131","format":"xml","model":"gpt-5-nano","expected":"10668","actual":"10668","isCorrect":true,"inputTokens":17141,"outputTokens":264,"latencyMs":3491.596833000018},{"questionId":"q131","format":"yaml","model":"gpt-5-nano","expected":"10668","actual":"10668","isCorrect":true,"inputTokens":13174,"outputTokens":200,"latencyMs":3151.8418749999837},{"questionId":"q132","format":"json","model":"gpt-5-nano","expected":"microsoft","actual":"microsoft","isCorrect":true,"inputTokens":15188,"outputTokens":136,"latencyMs":3227.4194580000476},{"questionId":"q132","format":"toon","model":"gpt-5-nano","expected":"microsoft","actual":"microsoft","isCorrect":true,"inputTokens":8789,"outputTokens":328,"latencyMs":4768.578541999974},{"questionId":"q132","format":"csv","model":"gpt-5-nano","expected":"microsoft","actual":"microsoft","isCorrect":true,"inputTokens":8557,"outputTokens":200,"latencyMs":3007.484666000004},{"questionId":"q132","format":"xml","model":"gpt-5-nano","expected":"microsoft","actual":"microsoft","isCorrect":true,"inputTokens":17139,"outputTokens":136,"latencyMs":3115.9872920000344},{"questionId":"q132","format":"yaml","model":"gpt-5-nano","expected":"microsoft","actual":"microsoft","isCorrect":true,"inputTokens":13172,"outputTokens":200,"latencyMs":3243.9305000000168},{"questionId":"q133","format":"json","model":"gpt-5-nano","expected":"main","actual":"main","isCorrect":true,"inputTokens":15194,"outputTokens":263,"latencyMs":3193.990583000006},{"questionId":"q133","format":"toon","model":"gpt-5-nano","expected":"main","actual":"main","isCorrect":true,"inputTokens":8795,"outputTokens":327,"latencyMs":4803.676875000005},{"questionId":"q133","format":"csv","model":"gpt-5-nano","expected":"main","actual":"main","isCorrect":true,"inputTokens":8563,"outputTokens":135,"latencyMs":3888.2197910000104},{"questionId":"q133","format":"xml","model":"gpt-5-nano","expected":"main","actual":"main","isCorrect":true,"inputTokens":17145,"outputTokens":327,"latencyMs":4009.9150000000373},{"questionId":"q133","format":"yaml","model":"gpt-5-nano","expected":"main","actual":"main","isCorrect":true,"inputTokens":13178,"outputTokens":135,"latencyMs":2496.18658400001},{"questionId":"q134","format":"json","model":"gpt-5-nano","expected":"2518","actual":"2518","isCorrect":true,"inputTokens":15189,"outputTokens":264,"latencyMs":3411.0290000000386},{"questionId":"q134","format":"toon","model":"gpt-5-nano","expected":"2518","actual":"2518","isCorrect":true,"inputTokens":8790,"outputTokens":392,"latencyMs":3882.483082999999},{"questionId":"q134","format":"csv","model":"gpt-5-nano","expected":"2518","actual":"2518","isCorrect":true,"inputTokens":8558,"outputTokens":392,"latencyMs":7564.845540999959},{"questionId":"q134","format":"xml","model":"gpt-5-nano","expected":"2518","actual":"2518","isCorrect":true,"inputTokens":17140,"outputTokens":328,"latencyMs":4407.723416999972},{"questionId":"q134","format":"yaml","model":"gpt-5-nano","expected":"2518","actual":"2518","isCorrect":true,"inputTokens":13173,"outputTokens":200,"latencyMs":2888.3398330000346},{"questionId":"q135","format":"json","model":"gpt-5-nano","expected":"103358","actual":"103358","isCorrect":true,"inputTokens":15194,"outputTokens":264,"latencyMs":2919.5192080000415},{"questionId":"q135","format":"toon","model":"gpt-5-nano","expected":"103358","actual":"103358","isCorrect":true,"inputTokens":8795,"outputTokens":904,"latencyMs":7213.861749999982},{"questionId":"q135","format":"csv","model":"gpt-5-nano","expected":"103358","actual":"103358","isCorrect":true,"inputTokens":8563,"outputTokens":392,"latencyMs":6935.310915999988},{"questionId":"q135","format":"xml","model":"gpt-5-nano","expected":"103358","actual":"103358","isCorrect":true,"inputTokens":17145,"outputTokens":392,"latencyMs":11060.892332999967},{"questionId":"q135","format":"yaml","model":"gpt-5-nano","expected":"103358","actual":"103358","isCorrect":true,"inputTokens":13178,"outputTokens":264,"latencyMs":3446.359790999966},{"questionId":"q136","format":"json","model":"gpt-5-nano","expected":"15413563","actual":"13178919","isCorrect":false,"inputTokens":15188,"outputTokens":4297,"latencyMs":31770.805915999983},{"questionId":"q136","format":"toon","model":"gpt-5-nano","expected":"15413563","actual":"15413563","isCorrect":true,"inputTokens":8789,"outputTokens":13705,"latencyMs":144553.23929200007},{"questionId":"q136","format":"csv","model":"gpt-5-nano","expected":"15413563","actual":"15413563","isCorrect":true,"inputTokens":8557,"outputTokens":4361,"latencyMs":37849.367791},{"questionId":"q136","format":"xml","model":"gpt-5-nano","expected":"15413563","actual":"11144871","isCorrect":false,"inputTokens":17139,"outputTokens":3529,"latencyMs":25391.98550000001},{"questionId":"q136","format":"yaml","model":"gpt-5-nano","expected":"15413563","actual":"15413563","isCorrect":true,"inputTokens":13172,"outputTokens":4361,"latencyMs":30101.182541000016},{"questionId":"q137","format":"json","model":"gpt-5-nano","expected":"100","actual":"100","isCorrect":true,"inputTokens":15185,"outputTokens":1351,"latencyMs":10567.556249999965},{"questionId":"q137","format":"toon","model":"gpt-5-nano","expected":"100","actual":"100","isCorrect":true,"inputTokens":8786,"outputTokens":199,"latencyMs":6884.175707999966},{"questionId":"q137","format":"csv","model":"gpt-5-nano","expected":"100","actual":"100","isCorrect":true,"inputTokens":8554,"outputTokens":1287,"latencyMs":10417.314499999979},{"questionId":"q137","format":"xml","model":"gpt-5-nano","expected":"100","actual":"100","isCorrect":true,"inputTokens":17136,"outputTokens":1351,"latencyMs":11468.357833000016},{"questionId":"q137","format":"yaml","model":"gpt-5-nano","expected":"100","actual":"90","isCorrect":false,"inputTokens":13169,"outputTokens":1287,"latencyMs":16153.891917},{"questionId":"q138","format":"json","model":"gpt-5-nano","expected":"154136","actual":"161831.35","isCorrect":false,"inputTokens":15187,"outputTokens":7114,"latencyMs":59323.559499999974},{"questionId":"q138","format":"toon","model":"gpt-5-nano","expected":"154136","actual":"154135.63","isCorrect":true,"inputTokens":8788,"outputTokens":10378,"latencyMs":80909.49466600001},{"questionId":"q138","format":"csv","model":"gpt-5-nano","expected":"154136","actual":"154135.63","isCorrect":true,"inputTokens":8556,"outputTokens":6154,"latencyMs":39523.52633299999},{"questionId":"q138","format":"xml","model":"gpt-5-nano","expected":"154136","actual":"176493.34","isCorrect":false,"inputTokens":17138,"outputTokens":4746,"latencyMs":33491.533999999985},{"questionId":"q138","format":"yaml","model":"gpt-5-nano","expected":"154136","actual":"148742.2972972973","isCorrect":false,"inputTokens":13171,"outputTokens":5645,"latencyMs":35528.64245799999},{"questionId":"q139","format":"json","model":"gpt-5-nano","expected":"77","actual":"65","isCorrect":false,"inputTokens":15188,"outputTokens":2375,"latencyMs":17078.73887499998},{"questionId":"q139","format":"toon","model":"gpt-5-nano","expected":"77","actual":"77","isCorrect":true,"inputTokens":8789,"outputTokens":4679,"latencyMs":43084.070791999984},{"questionId":"q139","format":"csv","model":"gpt-5-nano","expected":"77","actual":"77","isCorrect":true,"inputTokens":8557,"outputTokens":3079,"latencyMs":32706.273708999972},{"questionId":"q139","format":"xml","model":"gpt-5-nano","expected":"77","actual":"64","isCorrect":false,"inputTokens":17139,"outputTokens":5191,"latencyMs":33100.169166000036},{"questionId":"q139","format":"yaml","model":"gpt-5-nano","expected":"77","actual":"64","isCorrect":false,"inputTokens":15068,"outputTokens":7120,"latencyMs":64101.707042000024},{"questionId":"q140","format":"json","model":"gpt-5-nano","expected":"37","actual":"37","isCorrect":true,"inputTokens":15188,"outputTokens":2759,"latencyMs":19231.35716700001},{"questionId":"q140","format":"toon","model":"gpt-5-nano","expected":"37","actual":"37","isCorrect":true,"inputTokens":8789,"outputTokens":2055,"latencyMs":14256.53354100004},{"questionId":"q140","format":"csv","model":"gpt-5-nano","expected":"37","actual":"37","isCorrect":true,"inputTokens":8557,"outputTokens":3399,"latencyMs":22249.686209000007},{"questionId":"q140","format":"xml","model":"gpt-5-nano","expected":"37","actual":"37","isCorrect":true,"inputTokens":17139,"outputTokens":2631,"latencyMs":17393.795042000012},{"questionId":"q140","format":"yaml","model":"gpt-5-nano","expected":"37","actual":"37","isCorrect":true,"inputTokens":13172,"outputTokens":2695,"latencyMs":29277.75575000001},{"questionId":"q141","format":"json","model":"gpt-5-nano","expected":"16","actual":"16","isCorrect":true,"inputTokens":15188,"outputTokens":1991,"latencyMs":21917.801875000005},{"questionId":"q141","format":"toon","model":"gpt-5-nano","expected":"16","actual":"16","isCorrect":true,"inputTokens":8789,"outputTokens":3015,"latencyMs":31398.578958},{"questionId":"q141","format":"csv","model":"gpt-5-nano","expected":"16","actual":"16","isCorrect":true,"inputTokens":8557,"outputTokens":2247,"latencyMs":28072.916000000027},{"questionId":"q141","format":"xml","model":"gpt-5-nano","expected":"16","actual":"14","isCorrect":false,"inputTokens":17139,"outputTokens":1415,"latencyMs":10279.029707999958},{"questionId":"q141","format":"yaml","model":"gpt-5-nano","expected":"16","actual":"16","isCorrect":true,"inputTokens":13172,"outputTokens":2119,"latencyMs":13705.472291999962},{"questionId":"q142","format":"json","model":"gpt-5-nano","expected":"49","actual":"37","isCorrect":false,"inputTokens":15188,"outputTokens":2247,"latencyMs":17690.96520799998},{"questionId":"q142","format":"toon","model":"gpt-5-nano","expected":"49","actual":"41","isCorrect":false,"inputTokens":8789,"outputTokens":2247,"latencyMs":13964.716707999993},{"questionId":"q142","format":"csv","model":"gpt-5-nano","expected":"49","actual":"49","isCorrect":true,"inputTokens":8557,"outputTokens":4359,"latencyMs":24992.431709000026},{"questionId":"q142","format":"xml","model":"gpt-5-nano","expected":"49","actual":"38","isCorrect":false,"inputTokens":17139,"outputTokens":1863,"latencyMs":16201.765582999971},{"questionId":"q142","format":"yaml","model":"gpt-5-nano","expected":"49","actual":"49","isCorrect":true,"inputTokens":13172,"outputTokens":3463,"latencyMs":24024.559666000016},{"questionId":"q143","format":"json","model":"gpt-5-nano","expected":"23","actual":"22","isCorrect":false,"inputTokens":15188,"outputTokens":3271,"latencyMs":23265.205874999985},{"questionId":"q143","format":"toon","model":"gpt-5-nano","expected":"23","actual":"23","isCorrect":true,"inputTokens":8789,"outputTokens":4807,"latencyMs":31048.19095899997},{"questionId":"q143","format":"csv","model":"gpt-5-nano","expected":"23","actual":"23","isCorrect":true,"inputTokens":8557,"outputTokens":3911,"latencyMs":28614.549457999994},{"questionId":"q143","format":"xml","model":"gpt-5-nano","expected":"23","actual":"23","isCorrect":true,"inputTokens":17139,"outputTokens":7367,"latencyMs":50101.432875},{"questionId":"q143","format":"yaml","model":"gpt-5-nano","expected":"23","actual":"23","isCorrect":true,"inputTokens":13172,"outputTokens":6151,"latencyMs":37457.83145900001},{"questionId":"q144","format":"json","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":15188,"outputTokens":5639,"latencyMs":43804.95516700001},{"questionId":"q144","format":"toon","model":"gpt-5-nano","expected":"11","actual":"9","isCorrect":false,"inputTokens":8789,"outputTokens":2055,"latencyMs":14880.539583000005},{"questionId":"q144","format":"csv","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":8557,"outputTokens":3399,"latencyMs":35436.010875000036},{"questionId":"q144","format":"xml","model":"gpt-5-nano","expected":"11","actual":"10","isCorrect":false,"inputTokens":17139,"outputTokens":1863,"latencyMs":15287.124416999985},{"questionId":"q144","format":"yaml","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":13172,"outputTokens":4359,"latencyMs":45950.23566699994},{"questionId":"q145","format":"json","model":"gpt-5-nano","expected":"19","actual":"19","isCorrect":true,"inputTokens":15188,"outputTokens":4551,"latencyMs":41888.297667000035},{"questionId":"q145","format":"toon","model":"gpt-5-nano","expected":"19","actual":"19","isCorrect":true,"inputTokens":8789,"outputTokens":10183,"latencyMs":62461.11354200001},{"questionId":"q145","format":"csv","model":"gpt-5-nano","expected":"19","actual":"19","isCorrect":true,"inputTokens":8557,"outputTokens":3143,"latencyMs":33699.921250000014},{"questionId":"q145","format":"xml","model":"gpt-5-nano","expected":"19","actual":"19","isCorrect":true,"inputTokens":17139,"outputTokens":8711,"latencyMs":75960.46604100004},{"questionId":"q145","format":"yaml","model":"gpt-5-nano","expected":"19","actual":"20","isCorrect":false,"inputTokens":13172,"outputTokens":6215,"latencyMs":40243.47724999994},{"questionId":"q146","format":"json","model":"gpt-5-nano","expected":"4","actual":"4","isCorrect":true,"inputTokens":15188,"outputTokens":3527,"latencyMs":20281.348042000027},{"questionId":"q146","format":"toon","model":"gpt-5-nano","expected":"4","actual":"4","isCorrect":true,"inputTokens":8789,"outputTokens":3655,"latencyMs":38010.426833000034},{"questionId":"q146","format":"csv","model":"gpt-5-nano","expected":"4","actual":"4","isCorrect":true,"inputTokens":8557,"outputTokens":10183,"latencyMs":67830.84658300004},{"questionId":"q146","format":"xml","model":"gpt-5-nano","expected":"4","actual":"4","isCorrect":true,"inputTokens":17139,"outputTokens":2183,"latencyMs":14748.326416999975},{"questionId":"q146","format":"yaml","model":"gpt-5-nano","expected":"4","actual":"4","isCorrect":true,"inputTokens":13172,"outputTokens":3399,"latencyMs":31304.879791999992},{"questionId":"q147","format":"json","model":"gpt-5-nano","expected":"41","actual":"35","isCorrect":false,"inputTokens":15189,"outputTokens":2439,"latencyMs":19051.980291999993},{"questionId":"q147","format":"toon","model":"gpt-5-nano","expected":"41","actual":"37","isCorrect":false,"inputTokens":8790,"outputTokens":3527,"latencyMs":22412.309667000023},{"questionId":"q147","format":"csv","model":"gpt-5-nano","expected":"41","actual":"40","isCorrect":false,"inputTokens":8558,"outputTokens":3015,"latencyMs":30923.15724999999},{"questionId":"q147","format":"xml","model":"gpt-5-nano","expected":"41","actual":"54","isCorrect":false,"inputTokens":17140,"outputTokens":8519,"latencyMs":85140.38},{"questionId":"q147","format":"yaml","model":"gpt-5-nano","expected":"41","actual":"67","isCorrect":false,"inputTokens":13173,"outputTokens":10119,"latencyMs":54918.44337499997},{"questionId":"q148","format":"json","model":"gpt-5-nano","expected":"53","actual":"57","isCorrect":false,"inputTokens":15189,"outputTokens":2631,"latencyMs":14920.208584000007},{"questionId":"q148","format":"toon","model":"gpt-5-nano","expected":"53","actual":"53","isCorrect":true,"inputTokens":8790,"outputTokens":2567,"latencyMs":15358.285791000002},{"questionId":"q148","format":"csv","model":"gpt-5-nano","expected":"53","actual":"63","isCorrect":false,"inputTokens":8558,"outputTokens":2567,"latencyMs":16075.23654199997},{"questionId":"q148","format":"xml","model":"gpt-5-nano","expected":"53","actual":"48","isCorrect":false,"inputTokens":17140,"outputTokens":5063,"latencyMs":27688.752749999985},{"questionId":"q148","format":"yaml","model":"gpt-5-nano","expected":"53","actual":"53","isCorrect":true,"inputTokens":13173,"outputTokens":6151,"latencyMs":42171.99545899994},{"questionId":"q149","format":"json","model":"gpt-5-nano","expected":"57","actual":"57","isCorrect":true,"inputTokens":15195,"outputTokens":16391,"latencyMs":149480.339417},{"questionId":"q149","format":"toon","model":"gpt-5-nano","expected":"57","actual":"57","isCorrect":true,"inputTokens":8796,"outputTokens":10503,"latencyMs":72016.71129100001},{"questionId":"q149","format":"csv","model":"gpt-5-nano","expected":"57","actual":"57","isCorrect":true,"inputTokens":8564,"outputTokens":4807,"latencyMs":44379.204958999995},{"questionId":"q149","format":"xml","model":"gpt-5-nano","expected":"57","actual":"46","isCorrect":false,"inputTokens":17146,"outputTokens":9863,"latencyMs":54558.46879099996},{"questionId":"q149","format":"yaml","model":"gpt-5-nano","expected":"57","actual":"56","isCorrect":false,"inputTokens":13179,"outputTokens":12167,"latencyMs":66911.90370799997},{"questionId":"q150","format":"json","model":"gpt-5-nano","expected":"43","actual":"44","isCorrect":false,"inputTokens":15195,"outputTokens":3399,"latencyMs":21145.205332999933},{"questionId":"q150","format":"toon","model":"gpt-5-nano","expected":"43","actual":"43","isCorrect":true,"inputTokens":8796,"outputTokens":7623,"latencyMs":47413.88270800002},{"questionId":"q150","format":"csv","model":"gpt-5-nano","expected":"43","actual":"43","isCorrect":true,"inputTokens":8564,"outputTokens":3527,"latencyMs":19383.114291999955},{"questionId":"q150","format":"xml","model":"gpt-5-nano","expected":"43","actual":"37","isCorrect":false,"inputTokens":17146,"outputTokens":3655,"latencyMs":21911.847582999966},{"questionId":"q150","format":"yaml","model":"gpt-5-nano","expected":"43","actual":"40","isCorrect":false,"inputTokens":13179,"outputTokens":3015,"latencyMs":17307.11975000007},{"questionId":"q151","format":"json","model":"gpt-5-nano","expected":"25","actual":"30","isCorrect":false,"inputTokens":15195,"outputTokens":6471,"latencyMs":46284.17358299997},{"questionId":"q151","format":"toon","model":"gpt-5-nano","expected":"25","actual":"25","isCorrect":true,"inputTokens":8796,"outputTokens":5127,"latencyMs":30523.704790999996},{"questionId":"q151","format":"csv","model":"gpt-5-nano","expected":"25","actual":"25","isCorrect":true,"inputTokens":8564,"outputTokens":3975,"latencyMs":22902.60674999992},{"questionId":"q151","format":"xml","model":"gpt-5-nano","expected":"25","actual":"23","isCorrect":false,"inputTokens":17146,"outputTokens":7175,"latencyMs":40170.364291999955},{"questionId":"q151","format":"yaml","model":"gpt-5-nano","expected":"25","actual":"25","isCorrect":true,"inputTokens":13179,"outputTokens":3783,"latencyMs":24397.443499999936},{"questionId":"q152","format":"json","model":"gpt-5-nano","expected":"6","actual":"6","isCorrect":true,"inputTokens":15195,"outputTokens":1927,"latencyMs":11596.635458000004},{"questionId":"q152","format":"toon","model":"gpt-5-nano","expected":"6","actual":"6","isCorrect":true,"inputTokens":8796,"outputTokens":3591,"latencyMs":34839.70533299993},{"questionId":"q152","format":"csv","model":"gpt-5-nano","expected":"6","actual":"6","isCorrect":true,"inputTokens":8564,"outputTokens":2631,"latencyMs":16677.432333000004},{"questionId":"q152","format":"xml","model":"gpt-5-nano","expected":"6","actual":"6","isCorrect":true,"inputTokens":17146,"outputTokens":1863,"latencyMs":15458.170166000025},{"questionId":"q152","format":"yaml","model":"gpt-5-nano","expected":"6","actual":"6","isCorrect":true,"inputTokens":13179,"outputTokens":1863,"latencyMs":11414.568250000011},{"questionId":"q153","format":"json","model":"gpt-5-nano","expected":"6","actual":"6","isCorrect":true,"inputTokens":15195,"outputTokens":1543,"latencyMs":9937.089749999926},{"questionId":"q153","format":"toon","model":"gpt-5-nano","expected":"6","actual":"6","isCorrect":true,"inputTokens":8796,"outputTokens":5383,"latencyMs":29585.493583000032},{"questionId":"q153","format":"csv","model":"gpt-5-nano","expected":"6","actual":"6","isCorrect":true,"inputTokens":8564,"outputTokens":4679,"latencyMs":57854.73316600011},{"questionId":"q153","format":"xml","model":"gpt-5-nano","expected":"6","actual":"5","isCorrect":false,"inputTokens":17146,"outputTokens":3335,"latencyMs":24867.77470800001},{"questionId":"q153","format":"yaml","model":"gpt-5-nano","expected":"6","actual":"6","isCorrect":true,"inputTokens":13179,"outputTokens":2951,"latencyMs":18378.52354199998},{"questionId":"q154","format":"json","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":15195,"outputTokens":2375,"latencyMs":13409.657957999967},{"questionId":"q154","format":"toon","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":8796,"outputTokens":3527,"latencyMs":37057.010750000016},{"questionId":"q154","format":"csv","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":8564,"outputTokens":2695,"latencyMs":19144.693792000064},{"questionId":"q154","format":"xml","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":17146,"outputTokens":2375,"latencyMs":13506.978166999994},{"questionId":"q154","format":"yaml","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":13179,"outputTokens":2055,"latencyMs":16356.974416999961}]