diff --git a/.gitignore b/.gitignore
index b186605..f73f2b4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
dist
node_modules
.DS_Store
+.env
diff --git a/README.md b/README.md
index 546493c..271be53 100644
--- a/README.md
+++ b/README.md
@@ -42,168 +42,148 @@ users[2]{id,name,role}:
- ๐ **Indentation-based structure:** replaces braces with whitespace for better readability
- ๐งบ **Tabular arrays:** declare keys once, then stream rows without repetition
-## Token Benchmarks
+## Benchmarks
-> [!NOTE]
-> Benchmarks for LLM accuracy and retrieval are currently in development.
+
-
+### Token Efficiency
-| Example | JSON | TOON | Tokens Saved | Reduction |
-| ------- | ---- | ---- | ------------ | --------- |
-| ๐ค Simple user object | 31 | 18 | 13 | **41.9%** |
-| ๐ท๏ธ User with tags | 48 | 28 | 20 | **41.7%** |
-| ๐ฆ Small product catalog | 117 | 49 | 68 | **58.1%** |
-| ๐ฅ API response with users | 123 | 53 | 70 | **56.9%** |
-| โ๏ธ Nested configuration | 68 | 42 | 26 | **38.2%** |
-| ๐ E-commerce order | 163 | 94 | 69 | **42.3%** |
-| ๐ Analytics data | 209 | 94 | 115 | **55.0%** |
-| ๐ Large dataset (50 records) | 2159 | 762 | 1397 | **64.7%** |
-| **Total** | **2918** | **1140** | **1778** | **60.9%** |
+```
+โญ GitHub Repositories โโโโโโโโโโโโโโโโโโโโโโโโโ 8,745 tokens (JSON: 15,145) ๐ฐ 42.3% saved
+๐ Analytics Time Series โโโโโโโโโโโโโโโโโโโโโโโโโ 3,631 tokens (JSON: 9,024) ๐ฐ 59.8% saved
+๐ฅ API Response โโโโโโโโโโโโโโโโโโโโโโโโโ 2,593 tokens (JSON: 4,589) ๐ฐ 43.5% saved
+๐ E-commerce Order โโโโโโโโโโโโโโโโโโโโโโโโโ 203 tokens (JSON: 338) ๐ฐ 39.9% saved
+```
+
+**Total:** 15,172 tokens (TOON) vs 29,096 tokens (JSON) โ 47.9% savings
-View detailed results
+View detailed examples
-### ๐ฆ Small product catalog
+#### โญ GitHub Repositories
-**Savings: 68 tokens (58.1% reduction)**
+**Configuration:** Top 100 GitHub repositories with stars, forks, and metadata
-**JSON** (117 tokens):
+**Savings:** 6,400 tokens (42.3% reduction)
+
+**JSON** (15,145 tokens):
```json
{
- "items": [
+ "repositories": [
{
- "sku": "A1",
- "name": "Widget",
- "qty": 2,
- "price": 9.99
+ "id": 28457823,
+ "name": "freeCodeCamp",
+ "repo": "freeCodeCamp/freeCodeCamp",
+ "description": "freeCodeCamp.org's open-source codebase and curriculum. Learn math, programming,...",
+ "createdAt": "2014-12-24T17:49:19Z",
+ "updatedAt": "2025-10-27T07:40:58Z",
+ "pushedAt": "2025-10-26T11:31:08Z",
+ "stars": 430828,
+ "watchers": 8582,
+ "forks": 42136,
+ "defaultBranch": "main"
},
{
- "sku": "B2",
- "name": "Gadget",
- "qty": 1,
- "price": 14.5
+ "id": 132750724,
+ "name": "build-your-own-x",
+ "repo": "codecrafters-io/build-your-own-x",
+ "description": "Master programming by recreating your favorite technologies from scratch.",
+ "createdAt": "2018-05-09T12:03:18Z",
+ "updatedAt": "2025-10-27T07:43:25Z",
+ "pushedAt": "2025-10-10T18:45:01Z",
+ "stars": 430102,
+ "watchers": 6322,
+ "forks": 40388,
+ "defaultBranch": "master"
},
{
- "sku": "C3",
- "name": "Doohickey",
- "qty": 5,
- "price": 7.25
+ "id": 21737465,
+ "name": "awesome",
+ "repo": "sindresorhus/awesome",
+ "description": "๐ Awesome lists about all kinds of interesting topics",
+ "createdAt": "2014-07-11T13:42:37Z",
+ "updatedAt": "2025-10-27T07:44:27Z",
+ "pushedAt": "2025-10-23T17:26:53Z",
+ "stars": 409760,
+ "watchers": 8016,
+ "forks": 32015,
+ "defaultBranch": "main"
}
]
}
```
-**TOON** (49 tokens):
+**TOON** (8,745 tokens):
```
-items[3]{sku,name,qty,price}:
- A1,Widget,2,9.99
- B2,Gadget,1,14.5
- C3,Doohickey,5,7.25
+repositories[3]{id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch}:
+ 28457823,freeCodeCamp,freeCodeCamp/freeCodeCamp,"freeCodeCamp.org's open-source codebase and curriculum. Learn math, programming,...","2014-12-24T17:49:19Z","2025-10-27T07:40:58Z","2025-10-26T11:31:08Z",430828,8582,42136,main
+ 132750724,build-your-own-x,codecrafters-io/build-your-own-x,Master programming by recreating your favorite technologies from scratch.,"2018-05-09T12:03:18Z","2025-10-27T07:43:25Z","2025-10-10T18:45:01Z",430102,6322,40388,master
+ 21737465,awesome,sindresorhus/awesome,๐ Awesome lists about all kinds of interesting topics,"2014-07-11T13:42:37Z","2025-10-27T07:44:27Z","2025-10-23T17:26:53Z",409760,8016,32015,main
```
---
-### ๐ฅ API response with users
+#### ๐ Analytics Time Series
-**Savings: 70 tokens (56.9% reduction)**
+**Configuration:** 180 days of web metrics (views, clicks, conversions, revenue)
-**JSON** (123 tokens):
+**Savings:** 5,393 tokens (59.8% reduction)
-```json
-{
- "users": [
- {
- "id": 1,
- "name": "Alice",
- "email": "alice@example.com",
- "active": true
- },
- {
- "id": 2,
- "name": "Bob",
- "email": "bob@example.com",
- "active": true
- },
- {
- "id": 3,
- "name": "Charlie",
- "email": "charlie@example.com",
- "active": false
- }
- ],
- "total": 3,
- "page": 1
-}
-```
-
-**TOON** (53 tokens):
-
-```
-users[3]{id,name,email,active}:
- 1,Alice,alice@example.com,true
- 2,Bob,bob@example.com,true
- 3,Charlie,charlie@example.com,false
-total: 3
-page: 1
-```
-
----
-
-### ๐ Analytics data
-
-**Savings: 115 tokens (55.0% reduction)**
-
-**JSON** (209 tokens):
+**JSON** (9,024 tokens):
```json
{
"metrics": [
+ {
+ "date": "2024-12-31",
+ "views": 3769,
+ "clicks": 400,
+ "conversions": 59,
+ "revenue": 198.98
+ },
{
"date": "2025-01-01",
- "views": 1234,
- "clicks": 89,
- "conversions": 12
+ "views": 5742,
+ "clicks": 463,
+ "conversions": 28,
+ "revenue": 295.77
},
{
"date": "2025-01-02",
- "views": 2345,
- "clicks": 156,
- "conversions": 23
+ "views": 3669,
+ "clicks": 336,
+ "conversions": 102,
+ "revenue": 624.23
},
{
"date": "2025-01-03",
- "views": 1890,
- "clicks": 123,
- "conversions": 18
+ "views": 1332,
+ "clicks": 304,
+ "conversions": 99,
+ "revenue": 113.06
},
{
"date": "2025-01-04",
- "views": 3456,
- "clicks": 234,
- "conversions": 34
- },
- {
- "date": "2025-01-05",
- "views": 2789,
- "clicks": 178,
- "conversions": 27
+ "views": 1444,
+ "clicks": 222,
+ "conversions": 88,
+ "revenue": 986.69
}
]
}
```
-**TOON** (94 tokens):
+**TOON** (3,631 tokens):
```
-metrics[5]{date,views,clicks,conversions}:
- 2025-01-01,1234,89,12
- 2025-01-02,2345,156,23
- 2025-01-03,1890,123,18
- 2025-01-04,3456,234,34
- 2025-01-05,2789,178,27
+metrics[5]{date,views,clicks,conversions,revenue}:
+ 2024-12-31,3769,400,59,198.98
+ 2025-01-01,5742,463,28,295.77
+ 2025-01-02,3669,336,102,624.23
+ 2025-01-03,1332,304,99,113.06
+ 2025-01-04,1444,222,88,986.69
```
@@ -213,6 +193,107 @@ metrics[5]{date,views,clicks,conversions}:
> [!NOTE]
> Measured with [`gpt-tokenizer`](https://github.com/niieani/gpt-tokenizer) using `o200k_base` encoding (used by GPT-5 and other modern models). Savings will vary across models and tokenizers.
+
+
+### Retrieval Accuracy
+
+Tested across **2 LLMs** with data retrieval tasks:
+
+```
+gpt-4o-mini โโโโโโโโโโโโโโโโโโโโ 72.3% accuracy
+claude-haiku-4-5 โโโโโโโโโโโโโโโโโโโโ 76.7% accuracy
+```
+
+**TOON achieves 73.9% accuracy (vs JSON's 73.6%) while using 46.3% fewer tokens.**
+
+| Format | Accuracy | Average Tokens |
+| ------ | -------- | -------------- |
+| `toon` | 73.9% | 4.678 |
+| `json` | 73.6% | 8.713 |
+| `markdown-kv` | 73.6% | 8.649 |
+| `csv` | 72.3% | 4.745 |
+| `yaml` | 71.7% | 7.091 |
+
+
+View detailed breakdown by dataset and model
+
+#### Performance by Dataset
+
+##### Uniform employee records (TOON optimal format)
+
+| Format | Accuracy | Tokens | Correct/Total |
+|--------|----------|--------|---------------|
+| `toon` | 72.4% | 2.483 | 84/116 |
+| `csv` | 69.0% | 2.337 | 80/116 |
+| `yaml` | 68.1% | 4.969 | 79/116 |
+| `markdown-kv` | 68.1% | 6.270 | 79/116 |
+| `json` | 68.1% | 6.347 | 79/116 |
+
+##### E-commerce orders with nested structures
+
+| Format | Accuracy | Tokens | Correct/Total |
+|--------|----------|--------|---------------|
+| `toon` | 84.1% | 5.967 | 74/88 |
+| `csv` | 83.0% | 6.735 | 73/88 |
+| `yaml` | 81.8% | 7.328 | 72/88 |
+| `markdown-kv` | 86.4% | 9.110 | 76/88 |
+| `json` | 84.1% | 9.694 | 74/88 |
+
+##### Time-series analytics data
+
+| Format | Accuracy | Tokens | Correct/Total |
+|--------|----------|--------|---------------|
+| `csv` | 72.4% | 1.393 | 42/58 |
+| `toon` | 70.7% | 1.515 | 41/58 |
+| `yaml` | 72.4% | 2.938 | 42/58 |
+| `json` | 74.1% | 3.665 | 43/58 |
+| `markdown-kv` | 70.7% | 3.779 | 41/58 |
+
+##### Popular GitHub repositories
+
+| Format | Accuracy | Tokens | Correct/Total |
+|--------|----------|--------|---------------|
+| `toon` | 64.3% | 8.745 | 36/56 |
+| `csv` | 62.5% | 8.513 | 35/56 |
+| `json` | 67.9% | 15.145 | 38/56 |
+| `markdown-kv` | 67.9% | 15.436 | 38/56 |
+| `yaml` | 62.5% | 13.129 | 35/56 |
+
+
+#### Performance by Model
+
+##### gpt-4o-mini
+
+| Format | Accuracy | Correct/Total |
+|--------|----------|---------------|
+| `toon` | 72.3% | 115/159 |
+| `json` | 71.7% | 114/159 |
+| `markdown-kv` | 70.4% | 112/159 |
+| `csv` | 69.2% | 110/159 |
+| `yaml` | 68.6% | 109/159 |
+
+##### claude-haiku-4-5
+
+| Format | Accuracy | Correct/Total |
+|--------|----------|---------------|
+| `markdown-kv` | 76.7% | 122/159 |
+| `toon` | 75.5% | 120/159 |
+| `json` | 75.5% | 120/159 |
+| `csv` | 75.5% | 120/159 |
+| `yaml` | 74.8% | 119/159 |
+
+
+#### Methodology
+
+- **Semantic validation**: LLM-as-judge validates responses semantically (not exact string matching).
+- **Token counting**: Using `gpt-tokenizer` with `o200k_base` encoding.
+- **Question types**: Field retrieval, aggregation, and filtering tasks.
+- **Real data**: Faker.js-generated datasets + GitHub repositories.
+
+
+
+
+
## Installation
```bash
diff --git a/benchmarks/.env.example b/benchmarks/.env.example
new file mode 100644
index 0000000..df70883
--- /dev/null
+++ b/benchmarks/.env.example
@@ -0,0 +1,3 @@
+OPENAI_API_KEY=
+ANTHROPIC_API_KEY=
+GOOGLE_GENERATIVE_AI_API_KEY=
diff --git a/benchmarks/data/github-repos.json b/benchmarks/data/github-repos.json
new file mode 100644
index 0000000..b7ed072
--- /dev/null
+++ b/benchmarks/data/github-repos.json
@@ -0,0 +1,1302 @@
+[
+ {
+ "id": 28457823,
+ "name": "freeCodeCamp",
+ "repo": "freeCodeCamp/freeCodeCamp",
+ "description": "freeCodeCamp.org's open-source codebase and curriculum. Learn math, programming, and computer science for free.",
+ "createdAt": "2014-12-24T17:49:19Z",
+ "updatedAt": "2025-10-27T07:40:58Z",
+ "pushedAt": "2025-10-26T11:31:08Z",
+ "stars": 430828,
+ "watchers": 8582,
+ "forks": 42136,
+ "defaultBranch": "main"
+ },
+ {
+ "id": 132750724,
+ "name": "build-your-own-x",
+ "repo": "codecrafters-io/build-your-own-x",
+ "description": "Master programming by recreating your favorite technologies from scratch.",
+ "createdAt": "2018-05-09T12:03:18Z",
+ "updatedAt": "2025-10-27T07:43:25Z",
+ "pushedAt": "2025-10-10T18:45:01Z",
+ "stars": 430102,
+ "watchers": 6322,
+ "forks": 40388,
+ "defaultBranch": "master"
+ },
+ {
+ "id": 21737465,
+ "name": "awesome",
+ "repo": "sindresorhus/awesome",
+ "description": "๐ Awesome lists about all kinds of interesting topics",
+ "createdAt": "2014-07-11T13:42:37Z",
+ "updatedAt": "2025-10-27T07:44:27Z",
+ "pushedAt": "2025-10-23T17:26:53Z",
+ "stars": 409760,
+ "watchers": 8016,
+ "forks": 32015,
+ "defaultBranch": "main"
+ },
+ {
+ "id": 13491895,
+ "name": "free-programming-books",
+ "repo": "EbookFoundation/free-programming-books",
+ "description": ":books: Freely available programming books",
+ "createdAt": "2013-10-11T06:50:37Z",
+ "updatedAt": "2025-10-27T07:36:14Z",
+ "pushedAt": "2025-10-26T23:24:34Z",
+ "stars": 375134,
+ "watchers": 9788,
+ "forks": 65149,
+ "defaultBranch": "main"
+ },
+ {
+ "id": 54346799,
+ "name": "public-apis",
+ "repo": "public-apis/public-apis",
+ "description": "A collective list of free APIs",
+ "createdAt": "2016-03-20T23:49:42Z",
+ "updatedAt": "2025-10-27T07:45:41Z",
+ "pushedAt": "2025-05-20T15:56:34Z",
+ "stars": 373288,
+ "watchers": 4392,
+ "forks": 39386,
+ "defaultBranch": "master"
+ },
+ {
+ "id": 85077558,
+ "name": "developer-roadmap",
+ "repo": "kamranahmedse/developer-roadmap",
+ "description": "Interactive roadmaps, guides and other educational content to help developers grow in their careers.",
+ "createdAt": "2017-03-15T13:45:52Z",
+ "updatedAt": "2025-10-27T07:26:36Z",
+ "pushedAt": "2025-10-24T10:20:46Z",
+ "stars": 342038,
+ "watchers": 6887,
+ "forks": 43222,
+ "defaultBranch": "master"
+ },
+ {
+ "id": 60493101,
+ "name": "coding-interview-university",
+ "repo": "jwasham/coding-interview-university",
+ "description": "A complete computer science study plan to become a software engineer.",
+ "createdAt": "2016-06-06T02:34:12Z",
+ "updatedAt": "2025-10-27T07:46:31Z",
+ "pushedAt": "2025-08-28T14:42:47Z",
+ "stars": 331885,
+ "watchers": 8512,
+ "forks": 81046,
+ "defaultBranch": "main"
+ },
+ {
+ "id": 83222441,
+ "name": "system-design-primer",
+ "repo": "donnemartin/system-design-primer",
+ "description": "Learn how to design large-scale systems. Prep for the system design interview. Includes Anki flashcards.",
+ "createdAt": "2017-02-26T16:15:28Z",
+ "updatedAt": "2025-10-27T07:38:55Z",
+ "pushedAt": "2025-05-21T11:13:33Z",
+ "stars": 324162,
+ "watchers": 6818,
+ "forks": 52866,
+ "defaultBranch": "master"
+ },
+ {
+ "id": 177736533,
+ "name": "996.ICU",
+ "repo": "996icu/996.ICU",
+ "description": "Repo for counting stars and contributing. Press F to pay respect to glorious developers.",
+ "createdAt": "2019-03-26T07:31:14Z",
+ "updatedAt": "2025-10-27T07:35:11Z",
+ "pushedAt": "2025-08-22T06:01:29Z",
+ "stars": 274700,
+ "watchers": 4217,
+ "forks": 21033,
+ "defaultBranch": "master"
+ },
+ {
+ "id": 21289110,
+ "name": "awesome-python",
+ "repo": "vinta/awesome-python",
+ "description": "An opinionated list of awesome Python frameworks, libraries, software and resources.",
+ "createdAt": "2014-06-27T21:00:06Z",
+ "updatedAt": "2025-10-27T07:40:04Z",
+ "pushedAt": "2025-10-16T13:40:58Z",
+ "stars": 266460,
+ "watchers": 6127,
+ "forks": 26579,
+ "defaultBranch": "master"
+ },
+ {
+ "id": 36633370,
+ "name": "awesome-selfhosted",
+ "repo": "awesome-selfhosted/awesome-selfhosted",
+ "description": "A list of Free Software network services and web applications which can be hosted on your own servers",
+ "createdAt": "2015-06-01T02:33:17Z",
+ "updatedAt": "2025-10-27T07:43:02Z",
+ "pushedAt": "2025-10-23T10:47:33Z",
+ "stars": 254916,
+ "watchers": 2995,
+ "forks": 11798,
+ "defaultBranch": "master"
+ },
+ {
+ "id": 88011908,
+ "name": "project-based-learning",
+ "repo": "practical-tutorials/project-based-learning",
+ "description": "Curated list of project-based tutorials",
+ "createdAt": "2017-04-12T05:07:46Z",
+ "updatedAt": "2025-10-27T07:45:41Z",
+ "pushedAt": "2024-08-15T05:33:54Z",
+ "stars": 247930,
+ "watchers": 3445,
+ "forks": 32413,
+ "defaultBranch": "master"
+ },
+ {
+ "id": 10270250,
+ "name": "react",
+ "repo": "facebook/react",
+ "description": "The library for web and native user interfaces.",
+ "createdAt": "2013-05-24T16:15:54Z",
+ "updatedAt": "2025-10-27T06:47:16Z",
+ "pushedAt": "2025-10-24T22:08:43Z",
+ "stars": 240059,
+ "watchers": 6687,
+ "forks": 49664,
+ "defaultBranch": "main"
+ },
+ {
+ "id": 63476337,
+ "name": "Python",
+ "repo": "TheAlgorithms/Python",
+ "description": "All Algorithms implemented in Python",
+ "createdAt": "2016-07-16T09:44:01Z",
+ "updatedAt": "2025-10-27T07:26:23Z",
+ "pushedAt": "2025-10-20T00:59:36Z",
+ "stars": 212044,
+ "watchers": 5975,
+ "forks": 48986,
+ "defaultBranch": "master"
+ },
+ {
+ "id": 11730342,
+ "name": "vue",
+ "repo": "vuejs/vue",
+ "description": "This is the repo for Vue 2. For Vue 3, go to https://github.com/vuejs/core",
+ "createdAt": "2013-07-29T03:24:51Z",
+ "updatedAt": "2025-10-27T05:37:40Z",
+ "pushedAt": "2024-10-10T07:24:15Z",
+ "stars": 209624,
+ "watchers": 5787,
+ "forks": 33796,
+ "defaultBranch": "main"
+ },
+ {
+ "id": 2325298,
+ "name": "linux",
+ "repo": "torvalds/linux",
+ "description": "Linux kernel source tree",
+ "createdAt": "2011-09-04T22:48:12Z",
+ "updatedAt": "2025-10-27T07:25:34Z",
+ "pushedAt": "2025-10-26T23:00:24Z",
+ "stars": 205761,
+ "watchers": 7739,
+ "forks": 58023,
+ "defaultBranch": "master"
+ },
+ {
+ "id": 19415064,
+ "name": "computer-science",
+ "repo": "ossu/computer-science",
+ "description": "๐ Path to a free self-taught education in Computer Science!",
+ "createdAt": "2014-05-04T00:18:39Z",
+ "updatedAt": "2025-10-27T07:25:53Z",
+ "pushedAt": "2025-08-23T18:48:52Z",
+ "stars": 196024,
+ "watchers": 5935,
+ "forks": 24465,
+ "defaultBranch": "master"
+ },
+ {
+ "id": 126577260,
+ "name": "javascript-algorithms",
+ "repo": "trekhleb/javascript-algorithms",
+ "description": "๐ Algorithms and data structures implemented in JavaScript with explanations and links to further readings",
+ "createdAt": "2018-03-24T07:47:04Z",
+ "updatedAt": "2025-10-27T07:26:50Z",
+ "pushedAt": "2025-10-22T15:03:29Z",
+ "stars": 193648,
+ "watchers": 4267,
+ "forks": 30919,
+ "defaultBranch": "master"
+ },
+ {
+ "id": 45717250,
+ "name": "tensorflow",
+ "repo": "tensorflow/tensorflow",
+ "description": "An Open Source Machine Learning Framework for Everyone",
+ "createdAt": "2015-11-07T01:19:20Z",
+ "updatedAt": "2025-10-27T07:33:01Z",
+ "pushedAt": "2025-10-27T06:15:29Z",
+ "stars": 192220,
+ "watchers": 7431,
+ "forks": 74928,
+ "defaultBranch": "master"
+ },
+ {
+ "id": 138393139,
+ "name": "the-book-of-secret-knowledge",
+ "repo": "trimstray/the-book-of-secret-knowledge",
+ "description": "A collection of inspiring lists, manuals, cheatsheets, blogs, hacks, one-liners, cli/web tools and more.",
+ "createdAt": "2018-06-23T10:43:14Z",
+ "updatedAt": "2025-10-27T07:43:08Z",
+ "pushedAt": "2024-11-19T14:00:38Z",
+ "stars": 191315,
+ "watchers": 2679,
+ "forks": 11763,
+ "defaultBranch": "master"
+ },
+ {
+ "id": 14440270,
+ "name": "You-Dont-Know-JS",
+ "repo": "getify/You-Dont-Know-JS",
+ "description": "A book series (2 published editions) on the JS language.",
+ "createdAt": "2013-11-16T02:37:24Z",
+ "updatedAt": "2025-10-27T07:25:47Z",
+ "pushedAt": "2025-05-20T14:22:36Z",
+ "stars": 183631,
+ "watchers": 5802,
+ "forks": 33668,
+ "defaultBranch": "2nd-ed"
+ },
+ {
+ "id": 121395510,
+ "name": "CS-Notes",
+ "repo": "CyC2018/CS-Notes",
+ "description": ":books: ๆๆฏ้ข่ฏๅฟ
ๅคๅบ็ก็ฅ่ฏใLeetcodeใ่ฎก็ฎๆบๆไฝ็ณป็ปใ่ฎก็ฎๆบ็ฝ็ปใ็ณป็ป่ฎพ่ฎก",
+ "createdAt": "2018-02-13T14:56:24Z",
+ "updatedAt": "2025-10-27T07:19:57Z",
+ "pushedAt": "2024-08-21T09:40:10Z",
+ "stars": 182646,
+ "watchers": 5252,
+ "forks": 51251,
+ "defaultBranch": "master"
+ },
+ {
+ "id": 291137,
+ "name": "ohmyzsh",
+ "repo": "ohmyzsh/ohmyzsh",
+ "description": "๐ A delightful community-driven (with 2,400+ contributors) framework for managing your zsh configuration. Includes 300+ optional plugins (rails, git, macOS, hub, docker, homebrew, node, php, python, etc), 140+ themes to spice up your morning, and an auto-update tool that makes it easy to keep up with the latest updates from the community.",
+ "createdAt": "2009-08-28T18:15:37Z",
+ "updatedAt": "2025-10-27T07:25:29Z",
+ "pushedAt": "2025-10-26T13:17:47Z",
+ "stars": 182297,
+ "watchers": 2618,
+ "forks": 26259,
+ "defaultBranch": "master"
+ },
+ {
+ "id": 614765452,
+ "name": "AutoGPT",
+ "repo": "Significant-Gravitas/AutoGPT",
+ "description": "AutoGPT is the vision of accessible AI for everyone, to use and to build on. Our mission is to provide the tools, so that you can focus on what matters.",
+ "createdAt": "2023-03-16T09:21:07Z",
+ "updatedAt": "2025-10-27T07:34:44Z",
+ "pushedAt": "2025-10-27T00:10:36Z",
+ "stars": 179292,
+ "watchers": 1547,
+ "forks": 46077,
+ "defaultBranch": "master"
+ },
+ {
+ "id": 41881900,
+ "name": "vscode",
+ "repo": "microsoft/vscode",
+ "description": "Visual Studio Code",
+ "createdAt": "2015-09-03T20:23:38Z",
+ "updatedAt": "2025-10-27T07:26:11Z",
+ "pushedAt": "2025-10-27T07:29:25Z",
+ "stars": 177925,
+ "watchers": 3364,
+ "forks": 35788,
+ "defaultBranch": "main"
+ },
+ {
+ "id": 123458551,
+ "name": "Python-100-Days",
+ "repo": "jackfrued/Python-100-Days",
+ "description": "Python - 100ๅคฉไปๆฐๆๅฐๅคงๅธ",
+ "createdAt": "2018-03-01T16:05:52Z",
+ "updatedAt": "2025-10-27T07:26:50Z",
+ "pushedAt": "2025-03-28T10:29:23Z",
+ "stars": 173752,
+ "watchers": 6098,
+ "forks": 54771,
+ "defaultBranch": "master"
+ },
+ {
+ "id": 2126244,
+ "name": "bootstrap",
+ "repo": "twbs/bootstrap",
+ "description": "The most popular HTML, CSS, and JavaScript framework for developing responsive, mobile first projects on the web.",
+ "createdAt": "2011-07-29T21:19:00Z",
+ "updatedAt": "2025-10-27T07:25:34Z",
+ "pushedAt": "2025-10-26T18:41:31Z",
+ "stars": 173599,
+ "watchers": 6681,
+ "forks": 79156,
+ "defaultBranch": "main"
+ },
+ {
+ "id": 31792824,
+ "name": "flutter",
+ "repo": "flutter/flutter",
+ "description": "Flutter makes it easy and fast to build beautiful apps for mobile and beyond",
+ "createdAt": "2015-03-06T22:54:58Z",
+ "updatedAt": "2025-10-27T07:31:00Z",
+ "pushedAt": "2025-10-27T05:33:32Z",
+ "stars": 173546,
+ "watchers": 3481,
+ "forks": 29414,
+ "defaultBranch": "master"
+ },
+ {
+ "id": 1062897,
+ "name": "gitignore",
+ "repo": "github/gitignore",
+ "description": "A collection of useful .gitignore templates",
+ "createdAt": "2010-11-08T20:17:14Z",
+ "updatedAt": "2025-10-27T07:34:35Z",
+ "pushedAt": "2025-09-10T18:42:03Z",
+ "stars": 170298,
+ "watchers": 3366,
+ "forks": 82998,
+ "defaultBranch": "main"
+ },
+ {
+ "id": 35955666,
+ "name": "the-art-of-command-line",
+ "repo": "jlevy/the-art-of-command-line",
+ "description": "Master the command line, in one page",
+ "createdAt": "2015-05-20T15:11:03Z",
+ "updatedAt": "2025-10-27T07:26:07Z",
+ "pushedAt": "2024-06-25T18:13:44Z",
+ "stars": 158582,
+ "watchers": 2812,
+ "forks": 14754,
+ "defaultBranch": "master"
+ },
+ {
+ "id": 527591471,
+ "name": "stable-diffusion-webui",
+ "repo": "AUTOMATIC1111/stable-diffusion-webui",
+ "description": "Stable Diffusion web UI",
+ "createdAt": "2022-08-22T14:05:26Z",
+ "updatedAt": "2025-10-27T07:49:02Z",
+ "pushedAt": "2025-10-07T20:06:10Z",
+ "stars": 157565,
+ "watchers": 1154,
+ "forks": 29246,
+ "defaultBranch": "master"
+ },
+ {
+ "id": 21540759,
+ "name": "awesome-go",
+ "repo": "avelino/awesome-go",
+ "description": "A curated list of awesome Go frameworks, libraries and software",
+ "createdAt": "2014-07-06T13:42:15Z",
+ "updatedAt": "2025-10-27T07:49:36Z",
+ "pushedAt": "2025-10-22T12:15:14Z",
+ "stars": 155801,
+ "watchers": 2818,
+ "forks": 12706,
+ "defaultBranch": "main"
+ },
+ {
+ "id": 658928958,
+ "name": "ollama",
+ "repo": "ollama/ollama",
+ "description": "Get up and running with OpenAI gpt-oss, DeepSeek-R1, Gemma 3 and other models.",
+ "createdAt": "2023-06-26T19:39:32Z",
+ "updatedAt": "2025-10-27T07:43:05Z",
+ "pushedAt": "2025-10-27T01:25:05Z",
+ "stars": 154808,
+ "watchers": 877,
+ "forks": 13467,
+ "defaultBranch": "main"
+ },
+ {
+ "id": 233472199,
+ "name": "Microsoft-Activation-Scripts",
+ "repo": "massgravel/Microsoft-Activation-Scripts",
+ "description": "Open-source Windows and Office activator featuring HWID, Ohook, TSforge, KMS38, and Online KMS activation methods, along with advanced troubleshooting.",
+ "createdAt": "2020-01-12T23:03:34Z",
+ "updatedAt": "2025-10-27T07:44:35Z",
+ "pushedAt": "2025-09-30T22:22:59Z",
+ "stars": 153864,
+ "watchers": 1319,
+ "forks": 14861,
+ "defaultBranch": "master"
+ },
+ {
+ "id": 132464395,
+ "name": "JavaGuide",
+ "repo": "Snailclimb/JavaGuide",
+ "description": "ใJavaๅญฆไน +้ข่ฏๆๅใไธไปฝๆถต็ๅคง้จๅ Java ็จๅบๅๆ้่ฆๆๆก็ๆ ธๅฟ็ฅ่ฏใๅๅค Java ้ข่ฏ๏ผ้ฆ้ JavaGuide๏ผ",
+ "createdAt": "2018-05-07T13:27:00Z",
+ "updatedAt": "2025-10-27T07:25:13Z",
+ "pushedAt": "2025-10-20T08:53:33Z",
+ "stars": 152308,
+ "watchers": 4470,
+ "forks": 46020,
+ "defaultBranch": "main"
+ },
+ {
+ "id": 193215554,
+ "name": "n8n",
+ "repo": "n8n-io/n8n",
+ "description": "Fair-code workflow automation platform with native AI capabilities. Combine visual building with custom code, self-host or cloud, 400+ integrations.",
+ "createdAt": "2019-06-22T09:24:21Z",
+ "updatedAt": "2025-10-27T07:48:50Z",
+ "pushedAt": "2025-10-27T07:12:52Z",
+ "stars": 151975,
+ "watchers": 880,
+ "forks": 48459,
+ "defaultBranch": "master"
+ },
+ {
+ "id": 155220641,
+ "name": "transformers",
+ "repo": "huggingface/transformers",
+ "description": "๐ค Transformers: the model-definition framework for state-of-the-art machine learning models in text, vision, audio, and multimodal models, for both inference and training. ",
+ "createdAt": "2018-10-29T13:56:00Z",
+ "updatedAt": "2025-10-27T07:45:24Z",
+ "pushedAt": "2025-10-25T16:31:22Z",
+ "stars": 151659,
+ "watchers": 1166,
+ "forks": 30955,
+ "defaultBranch": "main"
+ },
+ {
+ "id": 6498492,
+ "name": "javascript",
+ "repo": "airbnb/javascript",
+ "description": "JavaScript Style Guide",
+ "createdAt": "2012-11-01T23:13:50Z",
+ "updatedAt": "2025-10-27T06:50:33Z",
+ "pushedAt": "2025-09-17T18:12:44Z",
+ "stars": 147687,
+ "watchers": 3705,
+ "forks": 26797,
+ "defaultBranch": "master"
+ },
+ {
+ "id": 1039520,
+ "name": "youtube-dl",
+ "repo": "ytdl-org/youtube-dl",
+ "description": "Command-line program to download videos from YouTube.com and other video sites",
+ "createdAt": "2010-10-31T14:35:07Z",
+ "updatedAt": "2025-10-27T07:30:15Z",
+ "pushedAt": "2025-10-18T10:02:28Z",
+ "stars": 138545,
+ "watchers": 2160,
+ "forks": 10527,
+ "defaultBranch": "master"
+ },
+ {
+ "id": 574523116,
+ "name": "awesome-chatgpt-prompts",
+ "repo": "f/awesome-chatgpt-prompts",
+ "description": "This repo includes ChatGPT prompt curation to use ChatGPT and other LLM tools better.",
+ "createdAt": "2022-12-05T13:54:13Z",
+ "updatedAt": "2025-10-27T07:42:24Z",
+ "pushedAt": "2025-10-14T17:23:13Z",
+ "stars": 135794,
+ "watchers": 1562,
+ "forks": 18073,
+ "defaultBranch": "main"
+ },
+ {
+ "id": 70107786,
+ "name": "next.js",
+ "repo": "vercel/next.js",
+ "description": "The React Framework",
+ "createdAt": "2016-10-05T23:32:51Z",
+ "updatedAt": "2025-10-27T07:38:47Z",
+ "pushedAt": "2025-10-27T07:02:37Z",
+ "stars": 135306,
+ "watchers": 1497,
+ "forks": 29680,
+ "defaultBranch": "canary"
+ },
+ {
+ "id": 599320067,
+ "name": "langflow",
+ "repo": "langflow-ai/langflow",
+ "description": "Langflow is a powerful tool for building and deploying AI-powered agents and workflows.",
+ "createdAt": "2023-02-08T22:28:03Z",
+ "updatedAt": "2025-10-27T07:22:05Z",
+ "pushedAt": "2025-10-27T00:28:51Z",
+ "stars": 134904,
+ "watchers": 453,
+ "forks": 7853,
+ "defaultBranch": "main"
+ },
+ {
+ "id": 307260205,
+ "name": "yt-dlp",
+ "repo": "yt-dlp/yt-dlp",
+ "description": "A feature-rich command-line audio/video downloader",
+ "createdAt": "2020-10-26T04:22:55Z",
+ "updatedAt": "2025-10-27T07:35:17Z",
+ "pushedAt": "2025-10-25T22:47:00Z",
+ "stars": 132793,
+ "watchers": 675,
+ "forks": 10659,
+ "defaultBranch": "master"
+ },
+ {
+ "id": 58028038,
+ "name": "HelloGitHub",
+ "repo": "521xueweihan/HelloGitHub",
+ "description": ":octocat: ๅไบซ GitHub ไธๆ่ถฃใๅ
ฅ้จ็บง็ๅผๆบ้กน็ฎใShare interesting, entry-level open source projects on GitHub.",
+ "createdAt": "2016-05-04T06:24:11Z",
+ "updatedAt": "2025-10-27T07:49:37Z",
+ "pushedAt": "2025-09-28T02:00:22Z",
+ "stars": 132228,
+ "watchers": 4182,
+ "forks": 10822,
+ "defaultBranch": "master"
+ },
+ {
+ "id": 62607227,
+ "name": "tech-interview-handbook",
+ "repo": "yangshun/tech-interview-handbook",
+ "description": "๐ฏ Curated coding interview preparation materials for busy software engineers",
+ "createdAt": "2016-07-05T05:00:48Z",
+ "updatedAt": "2025-10-27T07:26:22Z",
+ "pushedAt": "2025-08-27T00:17:33Z",
+ "stars": 131399,
+ "watchers": 2182,
+ "forks": 15942,
+ "defaultBranch": "main"
+ },
+ {
+ "id": 23096959,
+ "name": "go",
+ "repo": "golang/go",
+ "description": "The Go programming language",
+ "createdAt": "2014-08-19T04:33:40Z",
+ "updatedAt": "2025-10-27T07:25:58Z",
+ "pushedAt": "2025-10-27T04:49:52Z",
+ "stars": 130538,
+ "watchers": 3346,
+ "forks": 18415,
+ "defaultBranch": "master"
+ },
+ {
+ "id": 111583593,
+ "name": "scrcpy",
+ "repo": "Genymobile/scrcpy",
+ "description": "Display and control your Android device",
+ "createdAt": "2017-11-21T18:00:27Z",
+ "updatedAt": "2025-10-27T07:30:24Z",
+ "pushedAt": "2025-10-26T10:52:03Z",
+ "stars": 130238,
+ "watchers": 1321,
+ "forks": 12191,
+ "defaultBranch": "master"
+ },
+ {
+ "id": 241576270,
+ "name": "fucking-algorithm",
+ "repo": "labuladong/fucking-algorithm",
+ "description": "ๅท็ฎๆณๅ
จ้ ๅฅ่ทฏ๏ผ่ฎคๅ labuladong ๅฐฑๅคไบ๏ผEnglish version supported! Crack LeetCode, not only how, but also why. ",
+ "createdAt": "2020-02-19T09:01:23Z",
+ "updatedAt": "2025-10-27T07:27:20Z",
+ "pushedAt": "2025-10-08T04:06:00Z",
+ "stars": 129651,
+ "watchers": 2283,
+ "forks": 23450,
+ "defaultBranch": "master"
+ },
+ {
+ "id": 112507086,
+ "name": "30-seconds-of-code",
+ "repo": "Chalarangelo/30-seconds-of-code",
+ "description": "Coding articles to level up your development skills",
+ "createdAt": "2017-11-29T17:35:03Z",
+ "updatedAt": "2025-10-27T07:26:47Z",
+ "pushedAt": "2025-10-22T12:51:11Z",
+ "stars": 125630,
+ "watchers": 2594,
+ "forks": 12358,
+ "defaultBranch": "master"
+ },
+ {
+ "id": 184456251,
+ "name": "PowerToys",
+ "repo": "microsoft/PowerToys",
+ "description": "Microsoft PowerToys is a collection of utilities that help you customize Windows and streamline everyday tasks",
+ "createdAt": "2019-05-01T17:44:02Z",
+ "updatedAt": "2025-10-27T07:50:46Z",
+ "pushedAt": "2025-10-27T02:44:52Z",
+ "stars": 125223,
+ "watchers": 1164,
+ "forks": 7451,
+ "defaultBranch": "main"
+ },
+ {
+ "id": 29028775,
+ "name": "react-native",
+ "repo": "facebook/react-native",
+ "description": "A framework for building native applications using React",
+ "createdAt": "2015-01-09T18:10:16Z",
+ "updatedAt": "2025-10-27T07:20:37Z",
+ "pushedAt": "2025-10-27T06:53:57Z",
+ "stars": 124320,
+ "watchers": 3563,
+ "forks": 24914,
+ "defaultBranch": "main"
+ },
+ {
+ "id": 9384267,
+ "name": "electron",
+ "repo": "electron/electron",
+ "description": ":electron: Build cross-platform desktop apps with JavaScript, HTML, and CSS",
+ "createdAt": "2013-04-12T01:47:36Z",
+ "updatedAt": "2025-10-27T07:25:42Z",
+ "pushedAt": "2025-10-27T06:46:57Z",
+ "stars": 118841,
+ "watchers": 2801,
+ "forks": 16578,
+ "defaultBranch": "main"
+ },
+ {
+ "id": 20580498,
+ "name": "kubernetes",
+ "repo": "kubernetes/kubernetes",
+ "description": "Production-Grade Container Scheduling and Management",
+ "createdAt": "2014-06-06T22:56:04Z",
+ "updatedAt": "2025-10-27T07:31:13Z",
+ "pushedAt": "2025-10-26T22:21:34Z",
+ "stars": 118226,
+ "watchers": 3189,
+ "forks": 41578,
+ "defaultBranch": "master"
+ },
+ {
+ "id": 552661142,
+ "name": "langchain",
+ "repo": "langchain-ai/langchain",
+ "description": "๐ฆ๐ Build context-aware reasoning applications",
+ "createdAt": "2022-10-17T02:58:36Z",
+ "updatedAt": "2025-10-27T07:37:09Z",
+ "pushedAt": "2025-10-27T07:39:14Z",
+ "stars": 118140,
+ "watchers": 775,
+ "forks": 19453,
+ "defaultBranch": "master"
+ },
+ {
+ "id": 561730219,
+ "name": "hello-algo",
+ "repo": "krahets/hello-algo",
+ "description": "ใHello ็ฎๆณใ๏ผๅจ็ปๅพ่งฃใไธ้ฎ่ฟ่ก็ๆฐๆฎ็ปๆไธ็ฎๆณๆ็จใๆฏๆ Python, Java, C++, C, C#, JS, Go, Swift, Rust, Ruby, Kotlin, TS, Dart ไปฃ็ ใ็ฎไฝ็ๅ็นไฝ็ๅๆญฅๆดๆฐ๏ผEnglish version in translation",
+ "createdAt": "2022-11-04T11:08:34Z",
+ "updatedAt": "2025-10-27T07:28:05Z",
+ "pushedAt": "2025-10-16T21:33:36Z",
+ "stars": 118081,
+ "watchers": 582,
+ "forks": 14500,
+ "defaultBranch": "main"
+ },
+ {
+ "id": 626805178,
+ "name": "dify",
+ "repo": "langgenius/dify",
+ "description": "Production-ready platform for agentic workflow development.",
+ "createdAt": "2023-04-12T07:40:24Z",
+ "updatedAt": "2025-10-27T07:45:31Z",
+ "pushedAt": "2025-10-27T07:48:43Z",
+ "stars": 117359,
+ "watchers": 697,
+ "forks": 18125,
+ "defaultBranch": "main"
+ },
+ {
+ "id": 14098069,
+ "name": "free-programming-books-zh_CN",
+ "repo": "justjavac/free-programming-books-zh_CN",
+ "description": ":books: ๅ
่ดน็่ฎก็ฎๆบ็ผ็จ็ฑปไธญๆไนฆ็ฑ๏ผๆฌข่ฟๆ็จฟ",
+ "createdAt": "2013-11-04T01:59:19Z",
+ "updatedAt": "2025-10-27T07:25:46Z",
+ "pushedAt": "2024-07-15T08:55:20Z",
+ "stars": 115537,
+ "watchers": 5860,
+ "forks": 28362,
+ "defaultBranch": "main"
+ },
+ {
+ "id": 32484381,
+ "name": "free-for-dev",
+ "repo": "ripienaar/free-for-dev",
+ "description": "A list of SaaS, PaaS and IaaS offerings that have free tiers of interest to devops and infradev",
+ "createdAt": "2015-03-18T21:06:26Z",
+ "updatedAt": "2025-10-27T07:26:05Z",
+ "pushedAt": "2025-10-23T04:49:00Z",
+ "stars": 114093,
+ "watchers": 1734,
+ "forks": 11683,
+ "defaultBranch": "master"
+ },
+ {
+ "id": 27193779,
+ "name": "node",
+ "repo": "nodejs/node",
+ "description": "Node.js JavaScript runtime โจ๐ข๐โจ",
+ "createdAt": "2014-11-26T19:57:11Z",
+ "updatedAt": "2025-10-27T07:38:07Z",
+ "pushedAt": "2025-10-27T01:02:07Z",
+ "stars": 113974,
+ "watchers": 2964,
+ "forks": 33571,
+ "defaultBranch": "main"
+ },
+ {
+ "id": 701547123,
+ "name": "open-webui",
+ "repo": "open-webui/open-webui",
+ "description": "User-friendly AI Interface (Supports Ollama, OpenAI API, ...)",
+ "createdAt": "2023-10-06T22:08:27Z",
+ "updatedAt": "2025-10-27T07:32:58Z",
+ "pushedAt": "2025-10-27T05:20:59Z",
+ "stars": 113474,
+ "watchers": 516,
+ "forks": 15764,
+ "defaultBranch": "main"
+ },
+ {
+ "id": 943149,
+ "name": "d3",
+ "repo": "d3/d3",
+ "description": "Bring data to life with SVG, Canvas and HTML. :bar_chart::chart_with_upwards_trend::tada:",
+ "createdAt": "2010-09-27T17:22:42Z",
+ "updatedAt": "2025-10-27T07:25:31Z",
+ "pushedAt": "2025-07-27T11:30:40Z",
+ "stars": 111683,
+ "watchers": 3558,
+ "forks": 22851,
+ "defaultBranch": "main"
+ },
+ {
+ "id": 808144141,
+ "name": "FreeDomain",
+ "repo": "DigitalPlatDev/FreeDomain",
+ "description": "DigitalPlat FreeDomain: Free Domain For Everyone",
+ "createdAt": "2024-05-30T13:23:00Z",
+ "updatedAt": "2025-10-27T07:49:47Z",
+ "pushedAt": "2025-09-25T12:12:01Z",
+ "stars": 111350,
+ "watchers": 120,
+ "forks": 2066,
+ "defaultBranch": "main"
+ },
+ {
+ "id": 231283452,
+ "name": "excalidraw",
+ "repo": "excalidraw/excalidraw",
+ "description": "Virtual whiteboard for sketching hand-drawn like diagrams",
+ "createdAt": "2020-01-02T01:04:43Z",
+ "updatedAt": "2025-10-27T07:49:00Z",
+ "pushedAt": "2025-10-27T06:42:25Z",
+ "stars": 109225,
+ "watchers": 467,
+ "forks": 11332,
+ "defaultBranch": "master"
+ },
+ {
+ "id": 576201,
+ "name": "three.js",
+ "repo": "mrdoob/three.js",
+ "description": "JavaScript 3D Library.",
+ "createdAt": "2010-03-23T18:58:01Z",
+ "updatedAt": "2025-10-27T07:25:30Z",
+ "pushedAt": "2025-10-26T17:25:47Z",
+ "stars": 109123,
+ "watchers": 2517,
+ "forks": 36051,
+ "defaultBranch": "dev"
+ },
+ {
+ "id": 23088740,
+ "name": "axios",
+ "repo": "axios/axios",
+ "description": "Promise based HTTP client for the browser and node.js",
+ "createdAt": "2014-08-18T22:30:27Z",
+ "updatedAt": "2025-10-27T05:22:18Z",
+ "pushedAt": "2025-10-26T22:46:40Z",
+ "stars": 108017,
+ "watchers": 1169,
+ "forks": 11366,
+ "defaultBranch": "v1.x"
+ },
+ {
+ "id": 724712,
+ "name": "rust",
+ "repo": "rust-lang/rust",
+ "description": "Empowering everyone to build reliable and efficient software.",
+ "createdAt": "2010-06-16T20:39:03Z",
+ "updatedAt": "2025-10-27T06:39:34Z",
+ "pushedAt": "2025-10-27T07:25:41Z",
+ "stars": 107453,
+ "watchers": 1467,
+ "forks": 13897,
+ "defaultBranch": "master"
+ },
+ {
+ "id": 20929025,
+ "name": "TypeScript",
+ "repo": "microsoft/TypeScript",
+ "description": "TypeScript is a superset of JavaScript that compiles to clean JavaScript output.",
+ "createdAt": "2014-06-17T15:28:39Z",
+ "updatedAt": "2025-10-27T07:20:39Z",
+ "pushedAt": "2025-10-27T00:06:54Z",
+ "stars": 106530,
+ "watchers": 2148,
+ "forks": 13084,
+ "defaultBranch": "main"
+ },
+ {
+ "id": 133442384,
+ "name": "deno",
+ "repo": "denoland/deno",
+ "description": "A modern runtime for JavaScript and TypeScript.",
+ "createdAt": "2018-05-15T01:34:26Z",
+ "updatedAt": "2025-10-27T07:14:57Z",
+ "pushedAt": "2025-10-24T23:41:20Z",
+ "stars": 104915,
+ "watchers": 1398,
+ "forks": 5753,
+ "defaultBranch": "main"
+ },
+ {
+ "id": 103633984,
+ "name": "nodebestpractices",
+ "repo": "goldbergyoni/nodebestpractices",
+ "description": ":white_check_mark: The Node.js best practices list (July 2024)",
+ "createdAt": "2017-09-15T08:33:19Z",
+ "updatedAt": "2025-10-27T07:26:43Z",
+ "pushedAt": "2025-04-15T21:52:42Z",
+ "stars": 104439,
+ "watchers": 1944,
+ "forks": 10627,
+ "defaultBranch": "master"
+ },
+ {
+ "id": 63537249,
+ "name": "create-react-app",
+ "repo": "facebook/create-react-app",
+ "description": "Set up a modern web app by running one command.",
+ "createdAt": "2016-07-17T14:55:11Z",
+ "updatedAt": "2025-10-27T07:26:24Z",
+ "pushedAt": "2025-02-15T01:32:11Z",
+ "stars": 103811,
+ "watchers": 1892,
+ "forks": 27146,
+ "defaultBranch": "main"
+ },
+ {
+ "id": 206462776,
+ "name": "GitHub-Chinese-Top-Charts",
+ "repo": "GrowingGit/GitHub-Chinese-Top-Charts",
+ "description": ":cn: GitHubไธญๆๆ่กๆฆ๏ผๅ่ฏญ่จๅ่ฎพใ่ฝฏไปถ | ่ตๆใๆฆๅ๏ผ็ฒพๅๅฎไฝไธญๆๅฅฝ้กน็ฎใๅๅๆ้๏ผ้ซๆๅญฆไน ใ",
+ "createdAt": "2019-09-05T03:01:56Z",
+ "updatedAt": "2025-10-27T06:04:01Z",
+ "pushedAt": "2024-10-12T06:51:36Z",
+ "stars": 103336,
+ "watchers": 2607,
+ "forks": 13364,
+ "defaultBranch": "master"
+ },
+ {
+ "id": 15634981,
+ "name": "godot",
+ "repo": "godotengine/godot",
+ "description": "Godot Engine โ Multi-platform 2D and 3D game engine",
+ "createdAt": "2014-01-04T16:05:36Z",
+ "updatedAt": "2025-10-27T07:16:51Z",
+ "pushedAt": "2025-10-25T20:48:20Z",
+ "stars": 102604,
+ "watchers": 1493,
+ "forks": 23450,
+ "defaultBranch": "master"
+ },
+ {
+ "id": 299354207,
+ "name": "rustdesk",
+ "repo": "rustdesk/rustdesk",
+ "description": "An open-source remote desktop application designed for self-hosting, as an alternative to TeamViewer.",
+ "createdAt": "2020-09-28T15:36:08Z",
+ "updatedAt": "2025-10-27T07:42:29Z",
+ "pushedAt": "2025-10-26T13:28:57Z",
+ "stars": 101456,
+ "watchers": 548,
+ "forks": 14837,
+ "defaultBranch": "master"
+ },
+ {
+ "id": 655806940,
+ "name": "generative-ai-for-beginners",
+ "repo": "microsoft/generative-ai-for-beginners",
+ "description": "21 Lessons, Get Started Building with Generative AI ",
+ "createdAt": "2023-06-19T16:28:59Z",
+ "updatedAt": "2025-10-27T07:38:12Z",
+ "pushedAt": "2025-10-27T03:19:39Z",
+ "stars": 100935,
+ "watchers": 889,
+ "forks": 53478,
+ "defaultBranch": "main"
+ },
+ {
+ "id": 100060912,
+ "name": "terminal",
+ "repo": "microsoft/terminal",
+ "description": "The new Windows Terminal and the original Windows console host, all in the same place!",
+ "createdAt": "2017-08-11T18:38:22Z",
+ "updatedAt": "2025-10-27T05:40:24Z",
+ "pushedAt": "2025-10-22T01:31:33Z",
+ "stars": 100726,
+ "watchers": 1334,
+ "forks": 8879,
+ "defaultBranch": "main"
+ },
+ {
+ "id": 48378947,
+ "name": "frp",
+ "repo": "fatedier/frp",
+ "description": "A fast reverse proxy to help you expose a local server behind a NAT or firewall to the internet.",
+ "createdAt": "2015-12-21T15:24:59Z",
+ "updatedAt": "2025-10-27T07:00:25Z",
+ "pushedAt": "2025-10-17T02:53:43Z",
+ "stars": 100015,
+ "watchers": 1563,
+ "forks": 14562,
+ "defaultBranch": "dev"
+ },
+ {
+ "id": 908531752,
+ "name": "DeepSeek-V3",
+ "repo": "deepseek-ai/DeepSeek-V3",
+ "description": null,
+ "createdAt": "2024-12-26T09:52:40Z",
+ "updatedAt": "2025-10-27T07:28:30Z",
+ "pushedAt": "2025-08-28T03:24:37Z",
+ "stars": 99981,
+ "watchers": 750,
+ "forks": 16309,
+ "defaultBranch": "main"
+ },
+ {
+ "id": 55076063,
+ "name": "Awesome-Hacking",
+ "repo": "Hack-with-Github/Awesome-Hacking",
+ "description": "A collection of various awesome lists for hackers, pentesters and security researchers",
+ "createdAt": "2016-03-30T15:47:10Z",
+ "updatedAt": "2025-10-27T07:49:40Z",
+ "pushedAt": "2025-01-18T01:48:02Z",
+ "stars": 99684,
+ "watchers": 3931,
+ "forks": 9634,
+ "defaultBranch": "master"
+ },
+ {
+ "id": 15204860,
+ "name": "papers-we-love",
+ "repo": "papers-we-love/papers-we-love",
+ "description": "Papers from the computer science community to read and discuss.",
+ "createdAt": "2013-12-15T14:31:41Z",
+ "updatedAt": "2025-10-27T07:49:42Z",
+ "pushedAt": "2025-10-10T15:35:14Z",
+ "stars": 99626,
+ "watchers": 3159,
+ "forks": 6144,
+ "defaultBranch": "main"
+ },
+ {
+ "id": 24195339,
+ "name": "angular",
+ "repo": "angular/angular",
+ "description": "Deliver web apps with confidence ๐",
+ "createdAt": "2014-09-18T16:12:01Z",
+ "updatedAt": "2025-10-27T07:05:22Z",
+ "pushedAt": "2025-10-24T19:28:33Z",
+ "stars": 99167,
+ "watchers": 2980,
+ "forks": 26724,
+ "defaultBranch": "main"
+ },
+ {
+ "id": 585146387,
+ "name": "ui",
+ "repo": "shadcn-ui/ui",
+ "description": "A set of beautifully-designed, accessible components and a code distribution platform. Works with your favorite frameworks. Open Source. Open Code.",
+ "createdAt": "2023-01-04T12:43:27Z",
+ "updatedAt": "2025-10-27T07:34:00Z",
+ "pushedAt": "2025-10-27T07:18:39Z",
+ "stars": 98464,
+ "watchers": 306,
+ "forks": 7031,
+ "defaultBranch": "main"
+ },
+ {
+ "id": 196701619,
+ "name": "tauri",
+ "repo": "tauri-apps/tauri",
+ "description": "Build smaller, faster, and more secure desktop and mobile applications with a web frontend.",
+ "createdAt": "2019-07-13T09:09:37Z",
+ "updatedAt": "2025-10-27T07:27:10Z",
+ "pushedAt": "2025-10-26T13:55:16Z",
+ "stars": 98199,
+ "watchers": 530,
+ "forks": 3133,
+ "defaultBranch": "dev"
+ },
+ {
+ "id": 157616880,
+ "name": "iptv",
+ "repo": "iptv-org/iptv",
+ "description": "Collection of publicly available IPTV channels from all over the world",
+ "createdAt": "2018-11-14T22:00:57Z",
+ "updatedAt": "2025-10-27T07:13:48Z",
+ "pushedAt": "2025-10-27T00:13:17Z",
+ "stars": 98051,
+ "watchers": 1950,
+ "forks": 4195,
+ "defaultBranch": "master"
+ },
+ {
+ "id": 23083156,
+ "name": "material-ui",
+ "repo": "mui/material-ui",
+ "description": "Material UI: Comprehensive React component library that implements Google's Material Design. Free forever.",
+ "createdAt": "2014-08-18T19:11:54Z",
+ "updatedAt": "2025-10-27T07:25:58Z",
+ "pushedAt": "2025-10-27T07:11:45Z",
+ "stars": 96875,
+ "watchers": 1312,
+ "forks": 32696,
+ "defaultBranch": "master"
+ },
+ {
+ "id": 34526884,
+ "name": "ant-design",
+ "repo": "ant-design/ant-design",
+ "description": "An enterprise-class UI design language and React UI library",
+ "createdAt": "2015-04-24T15:37:24Z",
+ "updatedAt": "2025-10-27T07:19:39Z",
+ "pushedAt": "2025-10-27T07:44:37Z",
+ "stars": 96467,
+ "watchers": 236,
+ "forks": 53873,
+ "defaultBranch": "master"
+ },
+ {
+ "id": 243950408,
+ "name": "HowToCook",
+ "repo": "Anduin2017/HowToCook",
+ "description": "็จๅบๅๅจๅฎถๅ้ฅญๆนๆณๆๅใProgrammer's guide about how to cook at home (Simplified Chinese only).",
+ "createdAt": "2020-02-29T10:43:49Z",
+ "updatedAt": "2025-10-27T07:31:17Z",
+ "pushedAt": "2025-10-23T12:40:47Z",
+ "stars": 95393,
+ "watchers": 488,
+ "forks": 10650,
+ "defaultBranch": "master"
+ },
+ {
+ "id": 33614304,
+ "name": "thefuck",
+ "repo": "nvbn/thefuck",
+ "description": "Magnificent app which corrects your previous console command.",
+ "createdAt": "2015-04-08T15:08:04Z",
+ "updatedAt": "2025-10-27T07:26:06Z",
+ "pushedAt": "2024-07-19T14:56:13Z",
+ "stars": 94482,
+ "watchers": 825,
+ "forks": 3792,
+ "defaultBranch": "master"
+ },
+ {
+ "id": 65600975,
+ "name": "pytorch",
+ "repo": "pytorch/pytorch",
+ "description": "Tensors and Dynamic neural networks in Python with strong GPU acceleration",
+ "createdAt": "2016-08-13T05:26:41Z",
+ "updatedAt": "2025-10-27T07:51:08Z",
+ "pushedAt": "2025-10-27T07:51:03Z",
+ "stars": 94273,
+ "watchers": 1771,
+ "forks": 25671,
+ "defaultBranch": "main"
+ },
+ {
+ "id": 74791366,
+ "name": "clean-code-javascript",
+ "repo": "ryanmcdermott/clean-code-javascript",
+ "description": "Clean Code concepts adapted for JavaScript",
+ "createdAt": "2016-11-25T22:25:41Z",
+ "updatedAt": "2025-10-27T03:36:56Z",
+ "pushedAt": "2024-07-29T07:24:37Z",
+ "stars": 93960,
+ "watchers": 1744,
+ "forks": 12496,
+ "defaultBranch": "master"
+ },
+ {
+ "id": 101296881,
+ "name": "every-programmer-should-know",
+ "repo": "mtdvio/every-programmer-should-know",
+ "description": "A collection of (mostly) technical things every software developer should know about",
+ "createdAt": "2017-08-24T13:18:26Z",
+ "updatedAt": "2025-10-27T07:26:42Z",
+ "pushedAt": "2025-10-22T15:21:18Z",
+ "stars": 93814,
+ "watchers": 2011,
+ "forks": 8436,
+ "defaultBranch": "master"
+ },
+ {
+ "id": 16408992,
+ "name": "neovim",
+ "repo": "neovim/neovim",
+ "description": "Vim-fork focused on extensibility and usability",
+ "createdAt": "2014-01-31T13:39:22Z",
+ "updatedAt": "2025-10-27T07:30:43Z",
+ "pushedAt": "2025-10-27T05:15:23Z",
+ "stars": 93731,
+ "watchers": 971,
+ "forks": 6378,
+ "defaultBranch": "master"
+ },
+ {
+ "id": 943398999,
+ "name": "system-prompts-and-models-of-ai-tools",
+ "repo": "x1xhlol/system-prompts-and-models-of-ai-tools",
+ "description": "FULL Augment Code, Claude Code, Cluely, CodeBuddy, Comet, Cursor, Devin AI, Junie, Kiro, Leap.new, Lovable, Manus Agent Tools, NotionAI, Orchids.app, Perplexity, Poke, Qoder, Replit, Same.dev, Trae, Traycer AI, VSCode Agent, Warp.dev, Windsurf, Xcode, Z.ai Code, dia & v0. (And other Open Sourced) System Prompts, Internal Tools & AI Models",
+ "createdAt": "2025-03-05T16:38:29Z",
+ "updatedAt": "2025-10-27T07:37:40Z",
+ "pushedAt": "2025-10-19T18:44:24Z",
+ "stars": 93282,
+ "watchers": 1183,
+ "forks": 25228,
+ "defaultBranch": "main"
+ },
+ {
+ "id": 22790488,
+ "name": "java-design-patterns",
+ "repo": "iluwatar/java-design-patterns",
+ "description": "Design patterns implemented in Java",
+ "createdAt": "2014-08-09T16:45:18Z",
+ "updatedAt": "2025-10-27T07:35:54Z",
+ "pushedAt": "2025-10-21T21:30:34Z",
+ "stars": 93215,
+ "watchers": 3717,
+ "forks": 27309,
+ "defaultBranch": "master"
+ },
+ {
+ "id": 90796663,
+ "name": "puppeteer",
+ "repo": "puppeteer/puppeteer",
+ "description": "JavaScript API for Chrome and Firefox",
+ "createdAt": "2017-05-09T22:16:13Z",
+ "updatedAt": "2025-10-27T07:31:12Z",
+ "pushedAt": "2025-10-26T04:03:55Z",
+ "stars": 92724,
+ "watchers": 1184,
+ "forks": 9314,
+ "defaultBranch": "main"
+ },
+ {
+ "id": 311525798,
+ "name": "Web-Dev-For-Beginners",
+ "repo": "microsoft/Web-Dev-For-Beginners",
+ "description": "24 Lessons, 12 Weeks, Get Started as a Web Developer",
+ "createdAt": "2020-11-10T02:44:00Z",
+ "updatedAt": "2025-10-27T07:27:35Z",
+ "pushedAt": "2025-10-25T00:47:36Z",
+ "stars": 92476,
+ "watchers": 2690,
+ "forks": 14330,
+ "defaultBranch": "main"
+ },
+ {
+ "id": 589831718,
+ "name": "ComfyUI",
+ "repo": "comfyanonymous/ComfyUI",
+ "description": "The most powerful and modular diffusion model GUI, api and backend with a graph/nodes interface.",
+ "createdAt": "2023-01-17T03:15:56Z",
+ "updatedAt": "2025-10-27T07:46:53Z",
+ "pushedAt": "2025-10-27T00:23:05Z",
+ "stars": 92036,
+ "watchers": 614,
+ "forks": 10341,
+ "defaultBranch": "master"
+ },
+ {
+ "id": 63539055,
+ "name": "awesome-mac",
+ "repo": "jaywcjlove/awesome-mac",
+ "description": "๏ฃฟ Now we have become very big, Different from the original idea. Collect premium software in various categories.",
+ "createdAt": "2016-07-17T15:33:47Z",
+ "updatedAt": "2025-10-27T07:50:40Z",
+ "pushedAt": "2025-10-25T04:02:03Z",
+ "stars": 91815,
+ "watchers": 1517,
+ "forks": 6947,
+ "defaultBranch": "master"
+ },
+ {
+ "id": 919443098,
+ "name": "DeepSeek-R1",
+ "repo": "deepseek-ai/DeepSeek-R1",
+ "description": null,
+ "createdAt": "2025-01-20T11:57:28Z",
+ "updatedAt": "2025-10-27T06:56:07Z",
+ "pushedAt": "2025-06-27T08:35:54Z",
+ "stars": 91380,
+ "watchers": 607,
+ "forks": 11766,
+ "defaultBranch": "main"
+ },
+ {
+ "id": 160919119,
+ "name": "fastapi",
+ "repo": "fastapi/fastapi",
+ "description": "FastAPI framework, high performance, easy to learn, fast to code, ready for production",
+ "createdAt": "2018-12-08T08:21:47Z",
+ "updatedAt": "2025-10-27T07:49:54Z",
+ "pushedAt": "2025-10-23T20:55:59Z",
+ "stars": 91203,
+ "watchers": 723,
+ "forks": 8123,
+ "defaultBranch": "master"
+ },
+ {
+ "id": 106017343,
+ "name": "tailwindcss",
+ "repo": "tailwindlabs/tailwindcss",
+ "description": "A utility-first CSS framework for rapid UI development.",
+ "createdAt": "2017-10-06T14:59:14Z",
+ "updatedAt": "2025-10-27T07:48:03Z",
+ "pushedAt": "2025-10-24T11:53:16Z",
+ "stars": 90800,
+ "watchers": 615,
+ "forks": 4771,
+ "defaultBranch": "main"
+ }
+]
diff --git a/benchmarks/package.json b/benchmarks/package.json
new file mode 100644
index 0000000..b3c7f71
--- /dev/null
+++ b/benchmarks/package.json
@@ -0,0 +1,26 @@
+{
+ "name": "@toon/benchmarks",
+ "type": "module",
+ "private": true,
+ "scripts": {
+ "benchmark:token-efficiency": "tsx scripts/token-efficiency-benchmark.ts",
+ "benchmark:accuracy": "tsx --env-file=.env scripts/accuracy-benchmark.ts",
+ "fetch-github-data": "tsx scripts/fetch-github-data.ts",
+ "test": "vitest"
+ },
+ "devDependencies": {
+ "@ai-sdk/anthropic": "^2.0.37",
+ "@ai-sdk/google": "^2.0.23",
+ "@ai-sdk/openai": "^2.0.53",
+ "@ai-sdk/provider": "^2.0.0",
+ "@antfu/eslint-config": "^6.1.0",
+ "@faker-js/faker": "^10.1.0",
+ "ai": "^5.0.80",
+ "consola": "^3.4.2",
+ "csv-stringify": "^6.6.0",
+ "gpt-tokenizer": "^3.2.0",
+ "ofetch": "^1.4.1",
+ "p-map": "^7.0.3",
+ "yaml": "^2.8.1"
+ }
+}
diff --git a/benchmarks/results/accuracy/accuracy.md b/benchmarks/results/accuracy/accuracy.md
new file mode 100644
index 0000000..e435df6
--- /dev/null
+++ b/benchmarks/results/accuracy/accuracy.md
@@ -0,0 +1,96 @@
+### Retrieval Accuracy
+
+Tested across **2 LLMs** with data retrieval tasks:
+
+```
+gpt-4o-mini โโโโโโโโโโโโโโโโโโโโ 72.3% accuracy
+claude-haiku-4-5 โโโโโโโโโโโโโโโโโโโโ 76.7% accuracy
+```
+
+**TOON achieves 73.9% accuracy (vs JSON's 73.6%) while using 46.3% fewer tokens.**
+
+| Format | Accuracy | Average Tokens |
+| ------ | -------- | -------------- |
+| `toon` | 73.9% | 4.678 |
+| `json` | 73.6% | 8.713 |
+| `markdown-kv` | 73.6% | 8.649 |
+| `csv` | 72.3% | 4.745 |
+| `yaml` | 71.7% | 7.091 |
+
+
+View detailed breakdown by dataset and model
+
+#### Performance by Dataset
+
+##### Uniform employee records (TOON optimal format)
+
+| Format | Accuracy | Tokens | Correct/Total |
+|--------|----------|--------|---------------|
+| `toon` | 72.4% | 2.483 | 84/116 |
+| `csv` | 69.0% | 2.337 | 80/116 |
+| `yaml` | 68.1% | 4.969 | 79/116 |
+| `markdown-kv` | 68.1% | 6.270 | 79/116 |
+| `json` | 68.1% | 6.347 | 79/116 |
+
+##### E-commerce orders with nested structures
+
+| Format | Accuracy | Tokens | Correct/Total |
+|--------|----------|--------|---------------|
+| `toon` | 84.1% | 5.967 | 74/88 |
+| `csv` | 83.0% | 6.735 | 73/88 |
+| `yaml` | 81.8% | 7.328 | 72/88 |
+| `markdown-kv` | 86.4% | 9.110 | 76/88 |
+| `json` | 84.1% | 9.694 | 74/88 |
+
+##### Time-series analytics data
+
+| Format | Accuracy | Tokens | Correct/Total |
+|--------|----------|--------|---------------|
+| `csv` | 72.4% | 1.393 | 42/58 |
+| `toon` | 70.7% | 1.515 | 41/58 |
+| `yaml` | 72.4% | 2.938 | 42/58 |
+| `json` | 74.1% | 3.665 | 43/58 |
+| `markdown-kv` | 70.7% | 3.779 | 41/58 |
+
+##### Popular GitHub repositories
+
+| Format | Accuracy | Tokens | Correct/Total |
+|--------|----------|--------|---------------|
+| `toon` | 64.3% | 8.745 | 36/56 |
+| `csv` | 62.5% | 8.513 | 35/56 |
+| `json` | 67.9% | 15.145 | 38/56 |
+| `markdown-kv` | 67.9% | 15.436 | 38/56 |
+| `yaml` | 62.5% | 13.129 | 35/56 |
+
+
+#### Performance by Model
+
+##### gpt-4o-mini
+
+| Format | Accuracy | Correct/Total |
+|--------|----------|---------------|
+| `toon` | 72.3% | 115/159 |
+| `json` | 71.7% | 114/159 |
+| `markdown-kv` | 70.4% | 112/159 |
+| `csv` | 69.2% | 110/159 |
+| `yaml` | 68.6% | 109/159 |
+
+##### claude-haiku-4-5
+
+| Format | Accuracy | Correct/Total |
+|--------|----------|---------------|
+| `markdown-kv` | 76.7% | 122/159 |
+| `toon` | 75.5% | 120/159 |
+| `json` | 75.5% | 120/159 |
+| `csv` | 75.5% | 120/159 |
+| `yaml` | 74.8% | 119/159 |
+
+
+#### Methodology
+
+- **Semantic validation**: LLM-as-judge validates responses semantically (not exact string matching)
+- **Token counting**: Using `gpt-tokenizer` with `o200k_base` encoding
+- **Question types**: Field retrieval, aggregation, and filtering tasks
+- **Real data**: faker.js-generated datasets + real GitHub repository data
+
+
diff --git a/benchmarks/results/accuracy/raw-results.json b/benchmarks/results/accuracy/raw-results.json
new file mode 100644
index 0000000..a5a21a0
--- /dev/null
+++ b/benchmarks/results/accuracy/raw-results.json
@@ -0,0 +1,17492 @@
+[
+ {
+ "questionId": "q1",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "56176",
+ "actual": "56176",
+ "correct": true,
+ "inputTokens": 6391,
+ "outputTokens": 3,
+ "latencyMs": 1313
+ },
+ {
+ "questionId": "q1",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "56176",
+ "actual": "56176",
+ "correct": true,
+ "inputTokens": 7870,
+ "outputTokens": 6,
+ "latencyMs": 1346
+ },
+ {
+ "questionId": "q1",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "56176",
+ "actual": "56176",
+ "correct": true,
+ "inputTokens": 2528,
+ "outputTokens": 3,
+ "latencyMs": 1191
+ },
+ {
+ "questionId": "q1",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "56176",
+ "actual": "56176",
+ "correct": true,
+ "inputTokens": 2982,
+ "outputTokens": 6,
+ "latencyMs": 1399
+ },
+ {
+ "questionId": "q1",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "56176",
+ "actual": "56176",
+ "correct": true,
+ "inputTokens": 2382,
+ "outputTokens": 3,
+ "latencyMs": 5010
+ },
+ {
+ "questionId": "q1",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "56176",
+ "actual": "56176",
+ "correct": true,
+ "inputTokens": 2856,
+ "outputTokens": 6,
+ "latencyMs": 1472
+ },
+ {
+ "questionId": "q1",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "56176",
+ "actual": "56176",
+ "correct": true,
+ "inputTokens": 6317,
+ "outputTokens": 3,
+ "latencyMs": 1667
+ },
+ {
+ "questionId": "q1",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "56176",
+ "actual": "56176",
+ "correct": true,
+ "inputTokens": 6365,
+ "outputTokens": 6,
+ "latencyMs": 1507
+ },
+ {
+ "questionId": "q1",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "56176",
+ "actual": "56176",
+ "correct": true,
+ "inputTokens": 5013,
+ "outputTokens": 3,
+ "latencyMs": 1325
+ },
+ {
+ "questionId": "q1",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "56176",
+ "actual": "56176",
+ "correct": true,
+ "inputTokens": 5760,
+ "outputTokens": 6,
+ "latencyMs": 2280
+ },
+ {
+ "questionId": "q2",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 6391,
+ "outputTokens": 2,
+ "latencyMs": 3167
+ },
+ {
+ "questionId": "q2",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 7869,
+ "outputTokens": 4,
+ "latencyMs": 1267
+ },
+ {
+ "questionId": "q2",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 2528,
+ "outputTokens": 2,
+ "latencyMs": 1402
+ },
+ {
+ "questionId": "q2",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 2981,
+ "outputTokens": 4,
+ "latencyMs": 1290
+ },
+ {
+ "questionId": "q2",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 2382,
+ "outputTokens": 2,
+ "latencyMs": 5070
+ },
+ {
+ "questionId": "q2",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 2855,
+ "outputTokens": 4,
+ "latencyMs": 1320
+ },
+ {
+ "questionId": "q2",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 6317,
+ "outputTokens": 2,
+ "latencyMs": 1745
+ },
+ {
+ "questionId": "q2",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 6364,
+ "outputTokens": 4,
+ "latencyMs": 1191
+ },
+ {
+ "questionId": "q2",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 5013,
+ "outputTokens": 2,
+ "latencyMs": 2713
+ },
+ {
+ "questionId": "q2",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 5759,
+ "outputTokens": 4,
+ "latencyMs": 1309
+ },
+ {
+ "questionId": "q3",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "lorenza.kunze@yahoo.com",
+ "actual": "lorenza.kunze@yahoo.com",
+ "correct": true,
+ "inputTokens": 6393,
+ "outputTokens": 7,
+ "latencyMs": 1160
+ },
+ {
+ "questionId": "q3",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "lorenza.kunze@yahoo.com",
+ "actual": "lorenza.kunze@yahoo.com",
+ "correct": true,
+ "inputTokens": 7874,
+ "outputTokens": 12,
+ "latencyMs": 1338
+ },
+ {
+ "questionId": "q3",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "lorenza.kunze@yahoo.com",
+ "actual": "lorenza.kunze@yahoo.com",
+ "correct": true,
+ "inputTokens": 2530,
+ "outputTokens": 7,
+ "latencyMs": 1478
+ },
+ {
+ "questionId": "q3",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "lorenza.kunze@yahoo.com",
+ "actual": "lorenza.kunze@yahoo.com",
+ "correct": true,
+ "inputTokens": 2986,
+ "outputTokens": 12,
+ "latencyMs": 1563
+ },
+ {
+ "questionId": "q3",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "lorenza.kunze@yahoo.com",
+ "actual": "lorenza.kunze@yahoo.com",
+ "correct": true,
+ "inputTokens": 2384,
+ "outputTokens": 7,
+ "latencyMs": 1310
+ },
+ {
+ "questionId": "q3",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "lorenza.kunze@yahoo.com",
+ "actual": "lorenza.kunze@yahoo.com",
+ "correct": true,
+ "inputTokens": 2860,
+ "outputTokens": 12,
+ "latencyMs": 1236
+ },
+ {
+ "questionId": "q3",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "lorenza.kunze@yahoo.com",
+ "actual": "lorenza.kunze@yahoo.com",
+ "correct": true,
+ "inputTokens": 6319,
+ "outputTokens": 7,
+ "latencyMs": 2236
+ },
+ {
+ "questionId": "q3",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "lorenza.kunze@yahoo.com",
+ "actual": "lorenza.kunze@yahoo.com",
+ "correct": true,
+ "inputTokens": 6369,
+ "outputTokens": 12,
+ "latencyMs": 1253
+ },
+ {
+ "questionId": "q3",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "lorenza.kunze@yahoo.com",
+ "actual": "lorenza.kunze@yahoo.com",
+ "correct": true,
+ "inputTokens": 5015,
+ "outputTokens": 7,
+ "latencyMs": 1917
+ },
+ {
+ "questionId": "q3",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "lorenza.kunze@yahoo.com",
+ "actual": "lorenza.kunze@yahoo.com",
+ "correct": true,
+ "inputTokens": 5764,
+ "outputTokens": 12,
+ "latencyMs": 1332
+ },
+ {
+ "questionId": "q4",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "117381",
+ "actual": "117381",
+ "correct": true,
+ "inputTokens": 6391,
+ "outputTokens": 3,
+ "latencyMs": 2945
+ },
+ {
+ "questionId": "q4",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "117381",
+ "actual": "117381",
+ "correct": true,
+ "inputTokens": 7870,
+ "outputTokens": 6,
+ "latencyMs": 1773
+ },
+ {
+ "questionId": "q4",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "117381",
+ "actual": "117381",
+ "correct": true,
+ "inputTokens": 2528,
+ "outputTokens": 3,
+ "latencyMs": 1294
+ },
+ {
+ "questionId": "q4",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "117381",
+ "actual": "117381",
+ "correct": true,
+ "inputTokens": 2982,
+ "outputTokens": 6,
+ "latencyMs": 980
+ },
+ {
+ "questionId": "q4",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "117381",
+ "actual": "117381",
+ "correct": true,
+ "inputTokens": 2382,
+ "outputTokens": 3,
+ "latencyMs": 1747
+ },
+ {
+ "questionId": "q4",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "117381",
+ "actual": "117381",
+ "correct": true,
+ "inputTokens": 2856,
+ "outputTokens": 6,
+ "latencyMs": 1197
+ },
+ {
+ "questionId": "q4",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "117381",
+ "actual": "117381",
+ "correct": true,
+ "inputTokens": 6317,
+ "outputTokens": 3,
+ "latencyMs": 1039
+ },
+ {
+ "questionId": "q4",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "117381",
+ "actual": "117381",
+ "correct": true,
+ "inputTokens": 6365,
+ "outputTokens": 6,
+ "latencyMs": 1453
+ },
+ {
+ "questionId": "q4",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "117381",
+ "actual": "117381",
+ "correct": true,
+ "inputTokens": 5013,
+ "outputTokens": 3,
+ "latencyMs": 1056
+ },
+ {
+ "questionId": "q4",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "117381",
+ "actual": "117381",
+ "correct": true,
+ "inputTokens": 5760,
+ "outputTokens": 6,
+ "latencyMs": 1564
+ },
+ {
+ "questionId": "q5",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 6390,
+ "outputTokens": 2,
+ "latencyMs": 1263
+ },
+ {
+ "questionId": "q5",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 7868,
+ "outputTokens": 4,
+ "latencyMs": 1097
+ },
+ {
+ "questionId": "q5",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 2527,
+ "outputTokens": 2,
+ "latencyMs": 1248
+ },
+ {
+ "questionId": "q5",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 2980,
+ "outputTokens": 4,
+ "latencyMs": 1486
+ },
+ {
+ "questionId": "q5",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 2381,
+ "outputTokens": 2,
+ "latencyMs": 1311
+ },
+ {
+ "questionId": "q5",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 2854,
+ "outputTokens": 4,
+ "latencyMs": 1019
+ },
+ {
+ "questionId": "q5",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 6316,
+ "outputTokens": 2,
+ "latencyMs": 1287
+ },
+ {
+ "questionId": "q5",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 6363,
+ "outputTokens": 4,
+ "latencyMs": 1243
+ },
+ {
+ "questionId": "q5",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 5012,
+ "outputTokens": 2,
+ "latencyMs": 1339
+ },
+ {
+ "questionId": "q5",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 5758,
+ "outputTokens": 4,
+ "latencyMs": 1621
+ },
+ {
+ "questionId": "q6",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "jayda60@hotmail.com",
+ "actual": "jayda60@hotmail.com",
+ "correct": true,
+ "inputTokens": 6391,
+ "outputTokens": 6,
+ "latencyMs": 1625
+ },
+ {
+ "questionId": "q6",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "jayda60@hotmail.com",
+ "actual": "jayda60@hotmail.com",
+ "correct": true,
+ "inputTokens": 7871,
+ "outputTokens": 11,
+ "latencyMs": 1328
+ },
+ {
+ "questionId": "q6",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "jayda60@hotmail.com",
+ "actual": "jayda60@hotmail.com",
+ "correct": true,
+ "inputTokens": 2528,
+ "outputTokens": 6,
+ "latencyMs": 1463
+ },
+ {
+ "questionId": "q6",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "jayda60@hotmail.com",
+ "actual": "jayda60@hotmail.com",
+ "correct": true,
+ "inputTokens": 2983,
+ "outputTokens": 11,
+ "latencyMs": 1149
+ },
+ {
+ "questionId": "q6",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "jayda60@hotmail.com",
+ "actual": "jayda60@hotmail.com",
+ "correct": true,
+ "inputTokens": 2382,
+ "outputTokens": 6,
+ "latencyMs": 1474
+ },
+ {
+ "questionId": "q6",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "jayda60@hotmail.com",
+ "actual": "jayda60@hotmail.com",
+ "correct": true,
+ "inputTokens": 2857,
+ "outputTokens": 11,
+ "latencyMs": 977
+ },
+ {
+ "questionId": "q6",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "jayda60@hotmail.com",
+ "actual": "jayda60@hotmail.com",
+ "correct": true,
+ "inputTokens": 6317,
+ "outputTokens": 6,
+ "latencyMs": 2079
+ },
+ {
+ "questionId": "q6",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "jayda60@hotmail.com",
+ "actual": "jayda60@hotmail.com",
+ "correct": true,
+ "inputTokens": 6366,
+ "outputTokens": 11,
+ "latencyMs": 1134
+ },
+ {
+ "questionId": "q6",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "jayda60@hotmail.com",
+ "actual": "jayda60@hotmail.com",
+ "correct": true,
+ "inputTokens": 5013,
+ "outputTokens": 6,
+ "latencyMs": 1124
+ },
+ {
+ "questionId": "q6",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "jayda60@hotmail.com",
+ "actual": "jayda60@hotmail.com",
+ "correct": true,
+ "inputTokens": 5761,
+ "outputTokens": 11,
+ "latencyMs": 1053
+ },
+ {
+ "questionId": "q7",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "92971",
+ "actual": "92971",
+ "correct": true,
+ "inputTokens": 6391,
+ "outputTokens": 3,
+ "latencyMs": 1427
+ },
+ {
+ "questionId": "q7",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "92971",
+ "actual": "92971",
+ "correct": true,
+ "inputTokens": 7870,
+ "outputTokens": 6,
+ "latencyMs": 1246
+ },
+ {
+ "questionId": "q7",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "92971",
+ "actual": "92971",
+ "correct": true,
+ "inputTokens": 2528,
+ "outputTokens": 3,
+ "latencyMs": 1171
+ },
+ {
+ "questionId": "q7",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "92971",
+ "actual": "92971",
+ "correct": true,
+ "inputTokens": 2982,
+ "outputTokens": 6,
+ "latencyMs": 1547
+ },
+ {
+ "questionId": "q7",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "92971",
+ "actual": "92971",
+ "correct": true,
+ "inputTokens": 2382,
+ "outputTokens": 3,
+ "latencyMs": 1523
+ },
+ {
+ "questionId": "q7",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "92971",
+ "actual": "92971",
+ "correct": true,
+ "inputTokens": 2856,
+ "outputTokens": 6,
+ "latencyMs": 1148
+ },
+ {
+ "questionId": "q7",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "92971",
+ "actual": "92971",
+ "correct": true,
+ "inputTokens": 6317,
+ "outputTokens": 3,
+ "latencyMs": 1360
+ },
+ {
+ "questionId": "q7",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "92971",
+ "actual": "92971",
+ "correct": true,
+ "inputTokens": 6365,
+ "outputTokens": 6,
+ "latencyMs": 1100
+ },
+ {
+ "questionId": "q7",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "92971",
+ "actual": "92971",
+ "correct": true,
+ "inputTokens": 5013,
+ "outputTokens": 3,
+ "latencyMs": 1116
+ },
+ {
+ "questionId": "q7",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "92971",
+ "actual": "92971",
+ "correct": true,
+ "inputTokens": 5760,
+ "outputTokens": 6,
+ "latencyMs": 1202
+ },
+ {
+ "questionId": "q8",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Operations",
+ "correct": false,
+ "inputTokens": 6391,
+ "outputTokens": 2,
+ "latencyMs": 974
+ },
+ {
+ "questionId": "q8",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 7871,
+ "outputTokens": 4,
+ "latencyMs": 1357
+ },
+ {
+ "questionId": "q8",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 2528,
+ "outputTokens": 2,
+ "latencyMs": 1107
+ },
+ {
+ "questionId": "q8",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 2983,
+ "outputTokens": 4,
+ "latencyMs": 1126
+ },
+ {
+ "questionId": "q8",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 2382,
+ "outputTokens": 2,
+ "latencyMs": 1124
+ },
+ {
+ "questionId": "q8",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 2857,
+ "outputTokens": 4,
+ "latencyMs": 1208
+ },
+ {
+ "questionId": "q8",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Operations",
+ "correct": false,
+ "inputTokens": 6317,
+ "outputTokens": 2,
+ "latencyMs": 1463
+ },
+ {
+ "questionId": "q8",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 6366,
+ "outputTokens": 4,
+ "latencyMs": 1175
+ },
+ {
+ "questionId": "q8",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 5013,
+ "outputTokens": 2,
+ "latencyMs": 1952
+ },
+ {
+ "questionId": "q8",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 5761,
+ "outputTokens": 4,
+ "latencyMs": 1271
+ },
+ {
+ "questionId": "q9",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "terrance.hansen@yahoo.com",
+ "actual": "terrance.hansen@yahoo.com",
+ "correct": true,
+ "inputTokens": 6393,
+ "outputTokens": 7,
+ "latencyMs": 1301
+ },
+ {
+ "questionId": "q9",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "terrance.hansen@yahoo.com",
+ "actual": "terrance.hansen@yahoo.com",
+ "correct": true,
+ "inputTokens": 7871,
+ "outputTokens": 11,
+ "latencyMs": 1371
+ },
+ {
+ "questionId": "q9",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "terrance.hansen@yahoo.com",
+ "actual": "terrance.hansen@yahoo.com",
+ "correct": true,
+ "inputTokens": 2530,
+ "outputTokens": 7,
+ "latencyMs": 1197
+ },
+ {
+ "questionId": "q9",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "terrance.hansen@yahoo.com",
+ "actual": "terrance.hansen@yahoo.com",
+ "correct": true,
+ "inputTokens": 2983,
+ "outputTokens": 11,
+ "latencyMs": 1088
+ },
+ {
+ "questionId": "q9",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "terrance.hansen@yahoo.com",
+ "actual": "terrance.hansen@yahoo.com",
+ "correct": true,
+ "inputTokens": 2384,
+ "outputTokens": 7,
+ "latencyMs": 1310
+ },
+ {
+ "questionId": "q9",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "terrance.hansen@yahoo.com",
+ "actual": "terrance.hansen@yahoo.com",
+ "correct": true,
+ "inputTokens": 2857,
+ "outputTokens": 11,
+ "latencyMs": 1300
+ },
+ {
+ "questionId": "q9",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "terrance.hansen@yahoo.com",
+ "actual": "terrance.hansen@yahoo.com",
+ "correct": true,
+ "inputTokens": 6319,
+ "outputTokens": 7,
+ "latencyMs": 1531
+ },
+ {
+ "questionId": "q9",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "terrance.hansen@yahoo.com",
+ "actual": "terrance.hansen@yahoo.com",
+ "correct": true,
+ "inputTokens": 6366,
+ "outputTokens": 11,
+ "latencyMs": 1275
+ },
+ {
+ "questionId": "q9",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "terrance.hansen@yahoo.com",
+ "actual": "terrence.hansen@yahoo.com",
+ "correct": false,
+ "inputTokens": 5015,
+ "outputTokens": 7,
+ "latencyMs": 1245
+ },
+ {
+ "questionId": "q9",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "terrance.hansen@yahoo.com",
+ "actual": "terrance.hansen@yahoo.com",
+ "correct": true,
+ "inputTokens": 5761,
+ "outputTokens": 11,
+ "latencyMs": 1215
+ },
+ {
+ "questionId": "q10",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "107744",
+ "actual": "107744",
+ "correct": true,
+ "inputTokens": 6392,
+ "outputTokens": 3,
+ "latencyMs": 4959
+ },
+ {
+ "questionId": "q10",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "107744",
+ "actual": "107744",
+ "correct": true,
+ "inputTokens": 7870,
+ "outputTokens": 6,
+ "latencyMs": 1269
+ },
+ {
+ "questionId": "q10",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "107744",
+ "actual": "107744",
+ "correct": true,
+ "inputTokens": 2529,
+ "outputTokens": 3,
+ "latencyMs": 1111
+ },
+ {
+ "questionId": "q10",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "107744",
+ "actual": "107744",
+ "correct": true,
+ "inputTokens": 2982,
+ "outputTokens": 6,
+ "latencyMs": 1254
+ },
+ {
+ "questionId": "q10",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "107744",
+ "actual": "107744",
+ "correct": true,
+ "inputTokens": 2383,
+ "outputTokens": 3,
+ "latencyMs": 1616
+ },
+ {
+ "questionId": "q10",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "107744",
+ "actual": "107744",
+ "correct": true,
+ "inputTokens": 2856,
+ "outputTokens": 6,
+ "latencyMs": 1123
+ },
+ {
+ "questionId": "q10",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "107744",
+ "actual": "107744",
+ "correct": true,
+ "inputTokens": 6318,
+ "outputTokens": 3,
+ "latencyMs": 1201
+ },
+ {
+ "questionId": "q10",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "107744",
+ "actual": "107744",
+ "correct": true,
+ "inputTokens": 6365,
+ "outputTokens": 6,
+ "latencyMs": 1371
+ },
+ {
+ "questionId": "q10",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "107744",
+ "actual": "107744",
+ "correct": true,
+ "inputTokens": 5014,
+ "outputTokens": 3,
+ "latencyMs": 1503
+ },
+ {
+ "questionId": "q10",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "107744",
+ "actual": "107744",
+ "correct": true,
+ "inputTokens": 5760,
+ "outputTokens": 6,
+ "latencyMs": 1249
+ },
+ {
+ "questionId": "q11",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 6391,
+ "outputTokens": 2,
+ "latencyMs": 1383
+ },
+ {
+ "questionId": "q11",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 7869,
+ "outputTokens": 4,
+ "latencyMs": 1081
+ },
+ {
+ "questionId": "q11",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 2528,
+ "outputTokens": 2,
+ "latencyMs": 1677
+ },
+ {
+ "questionId": "q11",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 2981,
+ "outputTokens": 4,
+ "latencyMs": 1072
+ },
+ {
+ "questionId": "q11",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 2382,
+ "outputTokens": 2,
+ "latencyMs": 1142
+ },
+ {
+ "questionId": "q11",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 2855,
+ "outputTokens": 4,
+ "latencyMs": 991
+ },
+ {
+ "questionId": "q11",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 6317,
+ "outputTokens": 2,
+ "latencyMs": 1339
+ },
+ {
+ "questionId": "q11",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 6364,
+ "outputTokens": 4,
+ "latencyMs": 1117
+ },
+ {
+ "questionId": "q11",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 5013,
+ "outputTokens": 2,
+ "latencyMs": 2483
+ },
+ {
+ "questionId": "q11",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 5759,
+ "outputTokens": 4,
+ "latencyMs": 1187
+ },
+ {
+ "questionId": "q12",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "allan21@gmail.com",
+ "actual": "allan21@gmail.com",
+ "correct": true,
+ "inputTokens": 6390,
+ "outputTokens": 5,
+ "latencyMs": 1827
+ },
+ {
+ "questionId": "q12",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "allan21@gmail.com",
+ "actual": "allan21@gmail.com",
+ "correct": true,
+ "inputTokens": 7867,
+ "outputTokens": 9,
+ "latencyMs": 1121
+ },
+ {
+ "questionId": "q12",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "allan21@gmail.com",
+ "actual": "allan21@gmail.com",
+ "correct": true,
+ "inputTokens": 2527,
+ "outputTokens": 5,
+ "latencyMs": 1373
+ },
+ {
+ "questionId": "q12",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "allan21@gmail.com",
+ "actual": "allan21@gmail.com",
+ "correct": true,
+ "inputTokens": 2979,
+ "outputTokens": 9,
+ "latencyMs": 1284
+ },
+ {
+ "questionId": "q12",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "allan21@gmail.com",
+ "actual": "allan21@gmail.com",
+ "correct": true,
+ "inputTokens": 2381,
+ "outputTokens": 5,
+ "latencyMs": 1751
+ },
+ {
+ "questionId": "q12",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "allan21@gmail.com",
+ "actual": "allan21@gmail.com",
+ "correct": true,
+ "inputTokens": 2853,
+ "outputTokens": 9,
+ "latencyMs": 1140
+ },
+ {
+ "questionId": "q12",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "allan21@gmail.com",
+ "actual": "allan21@gmail.com",
+ "correct": true,
+ "inputTokens": 6316,
+ "outputTokens": 5,
+ "latencyMs": 1624
+ },
+ {
+ "questionId": "q12",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "allan21@gmail.com",
+ "actual": "allan21@gmail.com",
+ "correct": true,
+ "inputTokens": 6362,
+ "outputTokens": 9,
+ "latencyMs": 1071
+ },
+ {
+ "questionId": "q12",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "allan21@gmail.com",
+ "actual": "allan21@gmail.com",
+ "correct": true,
+ "inputTokens": 5012,
+ "outputTokens": 5,
+ "latencyMs": 1970
+ },
+ {
+ "questionId": "q12",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "allan21@gmail.com",
+ "actual": "allan21@gmail.com",
+ "correct": true,
+ "inputTokens": 5757,
+ "outputTokens": 9,
+ "latencyMs": 1437
+ },
+ {
+ "questionId": "q13",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "145843",
+ "actual": "145843",
+ "correct": true,
+ "inputTokens": 6389,
+ "outputTokens": 3,
+ "latencyMs": 1263
+ },
+ {
+ "questionId": "q13",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "145843",
+ "actual": "145843",
+ "correct": true,
+ "inputTokens": 7868,
+ "outputTokens": 6,
+ "latencyMs": 1277
+ },
+ {
+ "questionId": "q13",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "145843",
+ "actual": "145843",
+ "correct": true,
+ "inputTokens": 2526,
+ "outputTokens": 3,
+ "latencyMs": 1151
+ },
+ {
+ "questionId": "q13",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "145843",
+ "actual": "145843",
+ "correct": true,
+ "inputTokens": 2980,
+ "outputTokens": 6,
+ "latencyMs": 1260
+ },
+ {
+ "questionId": "q13",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "145843",
+ "actual": "145843",
+ "correct": true,
+ "inputTokens": 2380,
+ "outputTokens": 3,
+ "latencyMs": 1071
+ },
+ {
+ "questionId": "q13",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "145843",
+ "actual": "145843",
+ "correct": true,
+ "inputTokens": 2854,
+ "outputTokens": 6,
+ "latencyMs": 891
+ },
+ {
+ "questionId": "q13",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "145843",
+ "actual": "145843",
+ "correct": true,
+ "inputTokens": 6315,
+ "outputTokens": 3,
+ "latencyMs": 1548
+ },
+ {
+ "questionId": "q13",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "145843",
+ "actual": "145843",
+ "correct": true,
+ "inputTokens": 6363,
+ "outputTokens": 6,
+ "latencyMs": 1456
+ },
+ {
+ "questionId": "q13",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "145843",
+ "actual": "145843",
+ "correct": true,
+ "inputTokens": 5011,
+ "outputTokens": 3,
+ "latencyMs": 1268
+ },
+ {
+ "questionId": "q13",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "145843",
+ "actual": "145843",
+ "correct": true,
+ "inputTokens": 5758,
+ "outputTokens": 6,
+ "latencyMs": 1205
+ },
+ {
+ "questionId": "q14",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 6390,
+ "outputTokens": 2,
+ "latencyMs": 1310
+ },
+ {
+ "questionId": "q14",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 7868,
+ "outputTokens": 4,
+ "latencyMs": 1071
+ },
+ {
+ "questionId": "q14",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 2527,
+ "outputTokens": 2,
+ "latencyMs": 895
+ },
+ {
+ "questionId": "q14",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 2980,
+ "outputTokens": 4,
+ "latencyMs": 1020
+ },
+ {
+ "questionId": "q14",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 2381,
+ "outputTokens": 2,
+ "latencyMs": 1168
+ },
+ {
+ "questionId": "q14",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 2854,
+ "outputTokens": 4,
+ "latencyMs": 977
+ },
+ {
+ "questionId": "q14",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Operations",
+ "correct": false,
+ "inputTokens": 6316,
+ "outputTokens": 2,
+ "latencyMs": 1370
+ },
+ {
+ "questionId": "q14",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 6363,
+ "outputTokens": 4,
+ "latencyMs": 1508
+ },
+ {
+ "questionId": "q14",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 5012,
+ "outputTokens": 2,
+ "latencyMs": 3622
+ },
+ {
+ "questionId": "q14",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 5758,
+ "outputTokens": 4,
+ "latencyMs": 1249
+ },
+ {
+ "questionId": "q15",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "alexandria61@gmail.com",
+ "actual": "alexandria61@gmail.com",
+ "correct": true,
+ "inputTokens": 6391,
+ "outputTokens": 7,
+ "latencyMs": 3269
+ },
+ {
+ "questionId": "q15",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "alexandria61@gmail.com",
+ "actual": "alexandria61@gmail.com",
+ "correct": true,
+ "inputTokens": 7869,
+ "outputTokens": 9,
+ "latencyMs": 1538
+ },
+ {
+ "questionId": "q15",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "alexandria61@gmail.com",
+ "actual": "alexandria61@gmail.com",
+ "correct": true,
+ "inputTokens": 2528,
+ "outputTokens": 7,
+ "latencyMs": 1413
+ },
+ {
+ "questionId": "q15",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "alexandria61@gmail.com",
+ "actual": "alexandria61@gmail.com",
+ "correct": true,
+ "inputTokens": 2981,
+ "outputTokens": 9,
+ "latencyMs": 1027
+ },
+ {
+ "questionId": "q15",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "alexandria61@gmail.com",
+ "actual": "alexandria61@gmail.com",
+ "correct": true,
+ "inputTokens": 2382,
+ "outputTokens": 7,
+ "latencyMs": 1257
+ },
+ {
+ "questionId": "q15",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "alexandria61@gmail.com",
+ "actual": "alexandria61@gmail.com",
+ "correct": true,
+ "inputTokens": 2855,
+ "outputTokens": 9,
+ "latencyMs": 1169
+ },
+ {
+ "questionId": "q15",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "alexandria61@gmail.com",
+ "actual": "alexandria61@gmail.com",
+ "correct": true,
+ "inputTokens": 6317,
+ "outputTokens": 7,
+ "latencyMs": 1464
+ },
+ {
+ "questionId": "q15",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "alexandria61@gmail.com",
+ "actual": "alexandria61@gmail.com",
+ "correct": true,
+ "inputTokens": 6364,
+ "outputTokens": 9,
+ "latencyMs": 1799
+ },
+ {
+ "questionId": "q15",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "alexandria61@gmail.com",
+ "actual": "alexandria61@gmail.com",
+ "correct": true,
+ "inputTokens": 5013,
+ "outputTokens": 7,
+ "latencyMs": 1616
+ },
+ {
+ "questionId": "q15",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "alexandria61@gmail.com",
+ "actual": "alexandria61@gmail.com",
+ "correct": true,
+ "inputTokens": 5759,
+ "outputTokens": 9,
+ "latencyMs": 1349
+ },
+ {
+ "questionId": "q16",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "89436",
+ "actual": "89436",
+ "correct": true,
+ "inputTokens": 6390,
+ "outputTokens": 3,
+ "latencyMs": 1298
+ },
+ {
+ "questionId": "q16",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "89436",
+ "actual": "89436",
+ "correct": true,
+ "inputTokens": 7870,
+ "outputTokens": 6,
+ "latencyMs": 1115
+ },
+ {
+ "questionId": "q16",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "89436",
+ "actual": "89436",
+ "correct": true,
+ "inputTokens": 2527,
+ "outputTokens": 3,
+ "latencyMs": 1180
+ },
+ {
+ "questionId": "q16",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "89436",
+ "actual": "89436",
+ "correct": true,
+ "inputTokens": 2982,
+ "outputTokens": 6,
+ "latencyMs": 1110
+ },
+ {
+ "questionId": "q16",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "89436",
+ "actual": "89436",
+ "correct": true,
+ "inputTokens": 2381,
+ "outputTokens": 3,
+ "latencyMs": 1235
+ },
+ {
+ "questionId": "q16",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "89436",
+ "actual": "89436",
+ "correct": true,
+ "inputTokens": 2856,
+ "outputTokens": 6,
+ "latencyMs": 1228
+ },
+ {
+ "questionId": "q16",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "89436",
+ "actual": "89436",
+ "correct": true,
+ "inputTokens": 6316,
+ "outputTokens": 3,
+ "latencyMs": 1832
+ },
+ {
+ "questionId": "q16",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "89436",
+ "actual": "89436",
+ "correct": true,
+ "inputTokens": 6365,
+ "outputTokens": 6,
+ "latencyMs": 1401
+ },
+ {
+ "questionId": "q16",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "89436",
+ "actual": "89436",
+ "correct": true,
+ "inputTokens": 5012,
+ "outputTokens": 3,
+ "latencyMs": 933
+ },
+ {
+ "questionId": "q16",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "89436",
+ "actual": "89436",
+ "correct": true,
+ "inputTokens": 5760,
+ "outputTokens": 6,
+ "latencyMs": 1570
+ },
+ {
+ "questionId": "q17",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 6393,
+ "outputTokens": 2,
+ "latencyMs": 1221
+ },
+ {
+ "questionId": "q17",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 7872,
+ "outputTokens": 4,
+ "latencyMs": 1293
+ },
+ {
+ "questionId": "q17",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 2530,
+ "outputTokens": 2,
+ "latencyMs": 1147
+ },
+ {
+ "questionId": "q17",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 2984,
+ "outputTokens": 4,
+ "latencyMs": 923
+ },
+ {
+ "questionId": "q17",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 2384,
+ "outputTokens": 2,
+ "latencyMs": 1180
+ },
+ {
+ "questionId": "q17",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 2858,
+ "outputTokens": 4,
+ "latencyMs": 1025
+ },
+ {
+ "questionId": "q17",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 6319,
+ "outputTokens": 2,
+ "latencyMs": 1748
+ },
+ {
+ "questionId": "q17",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 6367,
+ "outputTokens": 4,
+ "latencyMs": 1188
+ },
+ {
+ "questionId": "q17",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 5015,
+ "outputTokens": 2,
+ "latencyMs": 1452
+ },
+ {
+ "questionId": "q17",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 5762,
+ "outputTokens": 4,
+ "latencyMs": 1329
+ },
+ {
+ "questionId": "q18",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "kelvin54@yahoo.com",
+ "actual": "kelvin54@yahoo.com",
+ "correct": true,
+ "inputTokens": 6391,
+ "outputTokens": 6,
+ "latencyMs": 768
+ },
+ {
+ "questionId": "q18",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "kelvin54@yahoo.com",
+ "actual": "kelvin54@yahoo.com",
+ "correct": true,
+ "inputTokens": 7871,
+ "outputTokens": 10,
+ "latencyMs": 1150
+ },
+ {
+ "questionId": "q18",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "kelvin54@yahoo.com",
+ "actual": "kelvin54@yahoo.com",
+ "correct": true,
+ "inputTokens": 2528,
+ "outputTokens": 6,
+ "latencyMs": 1501
+ },
+ {
+ "questionId": "q18",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "kelvin54@yahoo.com",
+ "actual": "kelvin54@yahoo.com",
+ "correct": true,
+ "inputTokens": 2983,
+ "outputTokens": 10,
+ "latencyMs": 1201
+ },
+ {
+ "questionId": "q18",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "kelvin54@yahoo.com",
+ "actual": "kelvin54@yahoo.com",
+ "correct": true,
+ "inputTokens": 2382,
+ "outputTokens": 6,
+ "latencyMs": 1604
+ },
+ {
+ "questionId": "q18",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "kelvin54@yahoo.com",
+ "actual": "kelvin54@yahoo.com",
+ "correct": true,
+ "inputTokens": 2857,
+ "outputTokens": 10,
+ "latencyMs": 1060
+ },
+ {
+ "questionId": "q18",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "kelvin54@yahoo.com",
+ "actual": "kelvin54@yahoo.com",
+ "correct": true,
+ "inputTokens": 6317,
+ "outputTokens": 6,
+ "latencyMs": 1350
+ },
+ {
+ "questionId": "q18",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "kelvin54@yahoo.com",
+ "actual": "kelvin54@yahoo.com",
+ "correct": true,
+ "inputTokens": 6366,
+ "outputTokens": 10,
+ "latencyMs": 1154
+ },
+ {
+ "questionId": "q18",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "kelvin54@yahoo.com",
+ "actual": "kelvin54@yahoo.com",
+ "correct": true,
+ "inputTokens": 5013,
+ "outputTokens": 6,
+ "latencyMs": 1199
+ },
+ {
+ "questionId": "q18",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "kelvin54@yahoo.com",
+ "actual": "kelvin54@yahoo.com",
+ "correct": true,
+ "inputTokens": 5761,
+ "outputTokens": 10,
+ "latencyMs": 1216
+ },
+ {
+ "questionId": "q19",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "143365",
+ "actual": "143365",
+ "correct": true,
+ "inputTokens": 6391,
+ "outputTokens": 3,
+ "latencyMs": 1412
+ },
+ {
+ "questionId": "q19",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "143365",
+ "actual": "143365",
+ "correct": true,
+ "inputTokens": 7872,
+ "outputTokens": 6,
+ "latencyMs": 1908
+ },
+ {
+ "questionId": "q19",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "143365",
+ "actual": "143365",
+ "correct": true,
+ "inputTokens": 2528,
+ "outputTokens": 3,
+ "latencyMs": 1366
+ },
+ {
+ "questionId": "q19",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "143365",
+ "actual": "143365",
+ "correct": true,
+ "inputTokens": 2984,
+ "outputTokens": 6,
+ "latencyMs": 1054
+ },
+ {
+ "questionId": "q19",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "143365",
+ "actual": "143365",
+ "correct": true,
+ "inputTokens": 2382,
+ "outputTokens": 3,
+ "latencyMs": 1121
+ },
+ {
+ "questionId": "q19",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "143365",
+ "actual": "143365",
+ "correct": true,
+ "inputTokens": 2858,
+ "outputTokens": 6,
+ "latencyMs": 1262
+ },
+ {
+ "questionId": "q19",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "143365",
+ "actual": "143365",
+ "correct": true,
+ "inputTokens": 6317,
+ "outputTokens": 3,
+ "latencyMs": 4632
+ },
+ {
+ "questionId": "q19",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "143365",
+ "actual": "143365",
+ "correct": true,
+ "inputTokens": 6367,
+ "outputTokens": 6,
+ "latencyMs": 1118
+ },
+ {
+ "questionId": "q19",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "143365",
+ "actual": "143365",
+ "correct": true,
+ "inputTokens": 5013,
+ "outputTokens": 3,
+ "latencyMs": 928
+ },
+ {
+ "questionId": "q19",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "143365",
+ "actual": "143365",
+ "correct": true,
+ "inputTokens": 5762,
+ "outputTokens": 6,
+ "latencyMs": 1191
+ },
+ {
+ "questionId": "q20",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 6390,
+ "outputTokens": 2,
+ "latencyMs": 1053
+ },
+ {
+ "questionId": "q20",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 7868,
+ "outputTokens": 4,
+ "latencyMs": 1096
+ },
+ {
+ "questionId": "q20",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 2527,
+ "outputTokens": 2,
+ "latencyMs": 1784
+ },
+ {
+ "questionId": "q20",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 2980,
+ "outputTokens": 4,
+ "latencyMs": 1093
+ },
+ {
+ "questionId": "q20",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 2381,
+ "outputTokens": 2,
+ "latencyMs": 1335
+ },
+ {
+ "questionId": "q20",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 2854,
+ "outputTokens": 4,
+ "latencyMs": 1546
+ },
+ {
+ "questionId": "q20",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 6316,
+ "outputTokens": 2,
+ "latencyMs": 1293
+ },
+ {
+ "questionId": "q20",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 6363,
+ "outputTokens": 4,
+ "latencyMs": 1230
+ },
+ {
+ "questionId": "q20",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 5012,
+ "outputTokens": 2,
+ "latencyMs": 1467
+ },
+ {
+ "questionId": "q20",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 5758,
+ "outputTokens": 4,
+ "latencyMs": 1370
+ },
+ {
+ "questionId": "q21",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "dean19@gmail.com",
+ "actual": "dean19@gmail.com",
+ "correct": true,
+ "inputTokens": 6394,
+ "outputTokens": 6,
+ "latencyMs": 5026
+ },
+ {
+ "questionId": "q21",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "dean19@gmail.com",
+ "actual": "dean19@gmail.com",
+ "correct": true,
+ "inputTokens": 7876,
+ "outputTokens": 9,
+ "latencyMs": 1786
+ },
+ {
+ "questionId": "q21",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "dean19@gmail.com",
+ "actual": "dean19@gmail.com",
+ "correct": true,
+ "inputTokens": 2531,
+ "outputTokens": 6,
+ "latencyMs": 826
+ },
+ {
+ "questionId": "q21",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "dean19@gmail.com",
+ "actual": "dean19@gmail.com",
+ "correct": true,
+ "inputTokens": 2988,
+ "outputTokens": 9,
+ "latencyMs": 909
+ },
+ {
+ "questionId": "q21",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "dean19@gmail.com",
+ "actual": "dean19@gmail.com",
+ "correct": true,
+ "inputTokens": 2385,
+ "outputTokens": 6,
+ "latencyMs": 1120
+ },
+ {
+ "questionId": "q21",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "dean19@gmail.com",
+ "actual": "dean19@gmail.com",
+ "correct": true,
+ "inputTokens": 2862,
+ "outputTokens": 9,
+ "latencyMs": 996
+ },
+ {
+ "questionId": "q21",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "dean19@gmail.com",
+ "actual": "dean19@gmail.com",
+ "correct": true,
+ "inputTokens": 6320,
+ "outputTokens": 6,
+ "latencyMs": 1639
+ },
+ {
+ "questionId": "q21",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "dean19@gmail.com",
+ "actual": "dean19@gmail.com",
+ "correct": true,
+ "inputTokens": 6371,
+ "outputTokens": 9,
+ "latencyMs": 1299
+ },
+ {
+ "questionId": "q21",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "dean19@gmail.com",
+ "actual": "dean19@gmail.com",
+ "correct": true,
+ "inputTokens": 5016,
+ "outputTokens": 6,
+ "latencyMs": 1151
+ },
+ {
+ "questionId": "q21",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "dean19@gmail.com",
+ "actual": "dean19@gmail.com",
+ "correct": true,
+ "inputTokens": 5766,
+ "outputTokens": 9,
+ "latencyMs": 1246
+ },
+ {
+ "questionId": "q22",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "111314",
+ "actual": "111314",
+ "correct": true,
+ "inputTokens": 6392,
+ "outputTokens": 3,
+ "latencyMs": 1838
+ },
+ {
+ "questionId": "q22",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "111314",
+ "actual": "111314",
+ "correct": true,
+ "inputTokens": 7871,
+ "outputTokens": 6,
+ "latencyMs": 1191
+ },
+ {
+ "questionId": "q22",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "111314",
+ "actual": "111314",
+ "correct": true,
+ "inputTokens": 2529,
+ "outputTokens": 3,
+ "latencyMs": 980
+ },
+ {
+ "questionId": "q22",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "111314",
+ "actual": "111314",
+ "correct": true,
+ "inputTokens": 2983,
+ "outputTokens": 6,
+ "latencyMs": 1299
+ },
+ {
+ "questionId": "q22",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "111314",
+ "actual": "111314",
+ "correct": true,
+ "inputTokens": 2383,
+ "outputTokens": 3,
+ "latencyMs": 1027
+ },
+ {
+ "questionId": "q22",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "111314",
+ "actual": "111314",
+ "correct": true,
+ "inputTokens": 2857,
+ "outputTokens": 6,
+ "latencyMs": 1433
+ },
+ {
+ "questionId": "q22",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "111314",
+ "actual": "111314",
+ "correct": true,
+ "inputTokens": 6318,
+ "outputTokens": 3,
+ "latencyMs": 2256
+ },
+ {
+ "questionId": "q22",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "111314",
+ "actual": "111314",
+ "correct": true,
+ "inputTokens": 6366,
+ "outputTokens": 6,
+ "latencyMs": 1091
+ },
+ {
+ "questionId": "q22",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "111314",
+ "actual": "111314",
+ "correct": true,
+ "inputTokens": 5014,
+ "outputTokens": 3,
+ "latencyMs": 1288
+ },
+ {
+ "questionId": "q22",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "111314",
+ "actual": "111314",
+ "correct": true,
+ "inputTokens": 5761,
+ "outputTokens": 6,
+ "latencyMs": 1306
+ },
+ {
+ "questionId": "q23",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 6389,
+ "outputTokens": 2,
+ "latencyMs": 1951
+ },
+ {
+ "questionId": "q23",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 7868,
+ "outputTokens": 4,
+ "latencyMs": 1440
+ },
+ {
+ "questionId": "q23",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 2526,
+ "outputTokens": 2,
+ "latencyMs": 978
+ },
+ {
+ "questionId": "q23",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 2980,
+ "outputTokens": 4,
+ "latencyMs": 1385
+ },
+ {
+ "questionId": "q23",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 2380,
+ "outputTokens": 2,
+ "latencyMs": 2311
+ },
+ {
+ "questionId": "q23",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 2854,
+ "outputTokens": 4,
+ "latencyMs": 1066
+ },
+ {
+ "questionId": "q23",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 6315,
+ "outputTokens": 2,
+ "latencyMs": 1914
+ },
+ {
+ "questionId": "q23",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 6363,
+ "outputTokens": 4,
+ "latencyMs": 1596
+ },
+ {
+ "questionId": "q23",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 5011,
+ "outputTokens": 2,
+ "latencyMs": 1820
+ },
+ {
+ "questionId": "q23",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 5758,
+ "outputTokens": 4,
+ "latencyMs": 1067
+ },
+ {
+ "questionId": "q24",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "laurel54@yahoo.com",
+ "actual": "laurel54@yahoo.com",
+ "correct": true,
+ "inputTokens": 6391,
+ "outputTokens": 6,
+ "latencyMs": 2594
+ },
+ {
+ "questionId": "q24",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "laurel54@yahoo.com",
+ "actual": "laurel54@yahoo.com",
+ "correct": true,
+ "inputTokens": 7869,
+ "outputTokens": 10,
+ "latencyMs": 1139
+ },
+ {
+ "questionId": "q24",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "laurel54@yahoo.com",
+ "actual": "laurel54@yahoo.com",
+ "correct": true,
+ "inputTokens": 2528,
+ "outputTokens": 6,
+ "latencyMs": 1225
+ },
+ {
+ "questionId": "q24",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "laurel54@yahoo.com",
+ "actual": "laurel54@yahoo.com",
+ "correct": true,
+ "inputTokens": 2981,
+ "outputTokens": 10,
+ "latencyMs": 1082
+ },
+ {
+ "questionId": "q24",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "laurel54@yahoo.com",
+ "actual": "laurel54@yahoo.com",
+ "correct": true,
+ "inputTokens": 2382,
+ "outputTokens": 6,
+ "latencyMs": 4857
+ },
+ {
+ "questionId": "q24",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "laurel54@yahoo.com",
+ "actual": "laurel54@yahoo.com",
+ "correct": true,
+ "inputTokens": 2855,
+ "outputTokens": 10,
+ "latencyMs": 1082
+ },
+ {
+ "questionId": "q24",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "laurel54@yahoo.com",
+ "actual": "laurel54@yahoo.com",
+ "correct": true,
+ "inputTokens": 6317,
+ "outputTokens": 6,
+ "latencyMs": 1272
+ },
+ {
+ "questionId": "q24",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "laurel54@yahoo.com",
+ "actual": "laurel54@yahoo.com",
+ "correct": true,
+ "inputTokens": 6364,
+ "outputTokens": 10,
+ "latencyMs": 1201
+ },
+ {
+ "questionId": "q24",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "laurel54@yahoo.com",
+ "actual": "laurel54@yahoo.com",
+ "correct": true,
+ "inputTokens": 5013,
+ "outputTokens": 6,
+ "latencyMs": 1197
+ },
+ {
+ "questionId": "q24",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "laurel54@yahoo.com",
+ "actual": "laurel54@yahoo.com",
+ "correct": true,
+ "inputTokens": 5759,
+ "outputTokens": 10,
+ "latencyMs": 1198
+ },
+ {
+ "questionId": "q25",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "89553",
+ "actual": "89553",
+ "correct": true,
+ "inputTokens": 6392,
+ "outputTokens": 3,
+ "latencyMs": 1085
+ },
+ {
+ "questionId": "q25",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "89553",
+ "actual": "89553",
+ "correct": true,
+ "inputTokens": 7873,
+ "outputTokens": 6,
+ "latencyMs": 1102
+ },
+ {
+ "questionId": "q25",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "89553",
+ "actual": "89553",
+ "correct": true,
+ "inputTokens": 2529,
+ "outputTokens": 3,
+ "latencyMs": 1350
+ },
+ {
+ "questionId": "q25",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "89553",
+ "actual": "89553",
+ "correct": true,
+ "inputTokens": 2985,
+ "outputTokens": 6,
+ "latencyMs": 1300
+ },
+ {
+ "questionId": "q25",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "89553",
+ "actual": "89553",
+ "correct": true,
+ "inputTokens": 2383,
+ "outputTokens": 3,
+ "latencyMs": 998
+ },
+ {
+ "questionId": "q25",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "89553",
+ "actual": "89553",
+ "correct": true,
+ "inputTokens": 2859,
+ "outputTokens": 6,
+ "latencyMs": 972
+ },
+ {
+ "questionId": "q25",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "89553",
+ "actual": "89553",
+ "correct": true,
+ "inputTokens": 6318,
+ "outputTokens": 3,
+ "latencyMs": 1331
+ },
+ {
+ "questionId": "q25",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "89553",
+ "actual": "89553",
+ "correct": true,
+ "inputTokens": 6368,
+ "outputTokens": 6,
+ "latencyMs": 1027
+ },
+ {
+ "questionId": "q25",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "89553",
+ "actual": "89553",
+ "correct": true,
+ "inputTokens": 5014,
+ "outputTokens": 3,
+ "latencyMs": 1170
+ },
+ {
+ "questionId": "q25",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "89553",
+ "actual": "89553",
+ "correct": true,
+ "inputTokens": 5763,
+ "outputTokens": 6,
+ "latencyMs": 1074
+ },
+ {
+ "questionId": "q26",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 6389,
+ "outputTokens": 2,
+ "latencyMs": 1862
+ },
+ {
+ "questionId": "q26",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 7866,
+ "outputTokens": 4,
+ "latencyMs": 1435
+ },
+ {
+ "questionId": "q26",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 2526,
+ "outputTokens": 2,
+ "latencyMs": 989
+ },
+ {
+ "questionId": "q26",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 2978,
+ "outputTokens": 4,
+ "latencyMs": 1035
+ },
+ {
+ "questionId": "q26",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 2380,
+ "outputTokens": 2,
+ "latencyMs": 2157
+ },
+ {
+ "questionId": "q26",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 2852,
+ "outputTokens": 4,
+ "latencyMs": 1094
+ },
+ {
+ "questionId": "q26",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 6315,
+ "outputTokens": 2,
+ "latencyMs": 1912
+ },
+ {
+ "questionId": "q26",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 6361,
+ "outputTokens": 4,
+ "latencyMs": 1364
+ },
+ {
+ "questionId": "q26",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 5011,
+ "outputTokens": 2,
+ "latencyMs": 1435
+ },
+ {
+ "questionId": "q26",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 5756,
+ "outputTokens": 4,
+ "latencyMs": 1082
+ },
+ {
+ "questionId": "q27",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "jayme.kertzmann77@gmail.com",
+ "actual": "jayme.kertzmann77@gmail.com",
+ "correct": true,
+ "inputTokens": 6392,
+ "outputTokens": 9,
+ "latencyMs": 1274
+ },
+ {
+ "questionId": "q27",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "jayme.kertzmann77@gmail.com",
+ "actual": "jayme.kertzmann77@gmail.com",
+ "correct": true,
+ "inputTokens": 7871,
+ "outputTokens": 14,
+ "latencyMs": 1130
+ },
+ {
+ "questionId": "q27",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "jayme.kertzmann77@gmail.com",
+ "actual": "jayme.kertzmann77@gmail.com",
+ "correct": true,
+ "inputTokens": 2529,
+ "outputTokens": 9,
+ "latencyMs": 1795
+ },
+ {
+ "questionId": "q27",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "jayme.kertzmann77@gmail.com",
+ "actual": "jayme.kertzmann77@gmail.com",
+ "correct": true,
+ "inputTokens": 2983,
+ "outputTokens": 14,
+ "latencyMs": 1309
+ },
+ {
+ "questionId": "q27",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "jayme.kertzmann77@gmail.com",
+ "actual": "jayme.kertzmann77@gmail.com",
+ "correct": true,
+ "inputTokens": 2383,
+ "outputTokens": 9,
+ "latencyMs": 1406
+ },
+ {
+ "questionId": "q27",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "jayme.kertzmann77@gmail.com",
+ "actual": "jayme.kertzmann77@gmail.com",
+ "correct": true,
+ "inputTokens": 2857,
+ "outputTokens": 14,
+ "latencyMs": 1398
+ },
+ {
+ "questionId": "q27",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "jayme.kertzmann77@gmail.com",
+ "actual": "jayme.kertzmann77@gmail.com",
+ "correct": true,
+ "inputTokens": 6318,
+ "outputTokens": 9,
+ "latencyMs": 1114
+ },
+ {
+ "questionId": "q27",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "jayme.kertzmann77@gmail.com",
+ "actual": "jayme.kertzmann77@gmail.com",
+ "correct": true,
+ "inputTokens": 6366,
+ "outputTokens": 14,
+ "latencyMs": 1251
+ },
+ {
+ "questionId": "q27",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "jayme.kertzmann77@gmail.com",
+ "actual": "jayme.kertzmann77@gmail.com",
+ "correct": true,
+ "inputTokens": 5014,
+ "outputTokens": 9,
+ "latencyMs": 1941
+ },
+ {
+ "questionId": "q27",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "jayme.kertzmann77@gmail.com",
+ "actual": "jayme.kertzmann77@gmail.com",
+ "correct": true,
+ "inputTokens": 5761,
+ "outputTokens": 14,
+ "latencyMs": 1218
+ },
+ {
+ "questionId": "q28",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "104053",
+ "actual": "104053",
+ "correct": true,
+ "inputTokens": 6391,
+ "outputTokens": 3,
+ "latencyMs": 1395
+ },
+ {
+ "questionId": "q28",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "104053",
+ "actual": "104053",
+ "correct": true,
+ "inputTokens": 7871,
+ "outputTokens": 6,
+ "latencyMs": 1342
+ },
+ {
+ "questionId": "q28",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "104053",
+ "actual": "104053",
+ "correct": true,
+ "inputTokens": 2528,
+ "outputTokens": 3,
+ "latencyMs": 919
+ },
+ {
+ "questionId": "q28",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "104053",
+ "actual": "104053",
+ "correct": true,
+ "inputTokens": 2983,
+ "outputTokens": 6,
+ "latencyMs": 1187
+ },
+ {
+ "questionId": "q28",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "104053",
+ "actual": "104053",
+ "correct": true,
+ "inputTokens": 2382,
+ "outputTokens": 3,
+ "latencyMs": 1131
+ },
+ {
+ "questionId": "q28",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "104053",
+ "actual": "104053",
+ "correct": true,
+ "inputTokens": 2857,
+ "outputTokens": 6,
+ "latencyMs": 1191
+ },
+ {
+ "questionId": "q28",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "104053",
+ "actual": "104053",
+ "correct": true,
+ "inputTokens": 6317,
+ "outputTokens": 3,
+ "latencyMs": 1435
+ },
+ {
+ "questionId": "q28",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "104053",
+ "actual": "104053",
+ "correct": true,
+ "inputTokens": 6366,
+ "outputTokens": 6,
+ "latencyMs": 1095
+ },
+ {
+ "questionId": "q28",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "104053",
+ "actual": "104053",
+ "correct": true,
+ "inputTokens": 5013,
+ "outputTokens": 3,
+ "latencyMs": 4588
+ },
+ {
+ "questionId": "q28",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "104053",
+ "actual": "104053",
+ "correct": true,
+ "inputTokens": 5761,
+ "outputTokens": 6,
+ "latencyMs": 1291
+ },
+ {
+ "questionId": "q29",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 6392,
+ "outputTokens": 2,
+ "latencyMs": 1688
+ },
+ {
+ "questionId": "q29",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 7872,
+ "outputTokens": 4,
+ "latencyMs": 1301
+ },
+ {
+ "questionId": "q29",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 2529,
+ "outputTokens": 2,
+ "latencyMs": 1914
+ },
+ {
+ "questionId": "q29",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 2984,
+ "outputTokens": 4,
+ "latencyMs": 1447
+ },
+ {
+ "questionId": "q29",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 2383,
+ "outputTokens": 2,
+ "latencyMs": 1725
+ },
+ {
+ "questionId": "q29",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 2858,
+ "outputTokens": 4,
+ "latencyMs": 923
+ },
+ {
+ "questionId": "q29",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 6318,
+ "outputTokens": 2,
+ "latencyMs": 879
+ },
+ {
+ "questionId": "q29",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 6367,
+ "outputTokens": 4,
+ "latencyMs": 1322
+ },
+ {
+ "questionId": "q29",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 5014,
+ "outputTokens": 2,
+ "latencyMs": 1394
+ },
+ {
+ "questionId": "q29",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 5762,
+ "outputTokens": 4,
+ "latencyMs": 1008
+ },
+ {
+ "questionId": "q30",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "carley.bauch@yahoo.com",
+ "actual": "carley.bauch@yahoo.com",
+ "correct": true,
+ "inputTokens": 6391,
+ "outputTokens": 7,
+ "latencyMs": 894
+ },
+ {
+ "questionId": "q30",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "carley.bauch@yahoo.com",
+ "actual": "carley.bauch@yahoo.com",
+ "correct": true,
+ "inputTokens": 7869,
+ "outputTokens": 12,
+ "latencyMs": 1220
+ },
+ {
+ "questionId": "q30",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "carley.bauch@yahoo.com",
+ "actual": "carley.bauch@yahoo.com",
+ "correct": true,
+ "inputTokens": 2528,
+ "outputTokens": 7,
+ "latencyMs": 2225
+ },
+ {
+ "questionId": "q30",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "carley.bauch@yahoo.com",
+ "actual": "carley.bauch@yahoo.com",
+ "correct": true,
+ "inputTokens": 2981,
+ "outputTokens": 12,
+ "latencyMs": 1282
+ },
+ {
+ "questionId": "q30",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "carley.bauch@yahoo.com",
+ "actual": "carley.bauch@yahoo.com",
+ "correct": true,
+ "inputTokens": 2382,
+ "outputTokens": 7,
+ "latencyMs": 1414
+ },
+ {
+ "questionId": "q30",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "carley.bauch@yahoo.com",
+ "actual": "carley.bauch@yahoo.com",
+ "correct": true,
+ "inputTokens": 2855,
+ "outputTokens": 12,
+ "latencyMs": 1686
+ },
+ {
+ "questionId": "q30",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "carley.bauch@yahoo.com",
+ "actual": "carley.bauch@yahoo.com",
+ "correct": true,
+ "inputTokens": 6317,
+ "outputTokens": 7,
+ "latencyMs": 1113
+ },
+ {
+ "questionId": "q30",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "carley.bauch@yahoo.com",
+ "actual": "carley.bauch@yahoo.com",
+ "correct": true,
+ "inputTokens": 6364,
+ "outputTokens": 12,
+ "latencyMs": 1089
+ },
+ {
+ "questionId": "q30",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "carley.bauch@yahoo.com",
+ "actual": "carley.bauch@yahoo.com",
+ "correct": true,
+ "inputTokens": 5013,
+ "outputTokens": 7,
+ "latencyMs": 949
+ },
+ {
+ "questionId": "q30",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "carley.bauch@yahoo.com",
+ "actual": "carley.bauch@yahoo.com",
+ "correct": true,
+ "inputTokens": 5759,
+ "outputTokens": 12,
+ "latencyMs": 1273
+ },
+ {
+ "questionId": "q31",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "142029",
+ "actual": "142029",
+ "correct": true,
+ "inputTokens": 6394,
+ "outputTokens": 3,
+ "latencyMs": 4741
+ },
+ {
+ "questionId": "q31",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "142029",
+ "actual": "142029",
+ "correct": true,
+ "inputTokens": 7874,
+ "outputTokens": 6,
+ "latencyMs": 1132
+ },
+ {
+ "questionId": "q31",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "142029",
+ "actual": "142029",
+ "correct": true,
+ "inputTokens": 2531,
+ "outputTokens": 3,
+ "latencyMs": 1184
+ },
+ {
+ "questionId": "q31",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "142029",
+ "actual": "142029",
+ "correct": true,
+ "inputTokens": 2986,
+ "outputTokens": 6,
+ "latencyMs": 1137
+ },
+ {
+ "questionId": "q31",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "142029",
+ "actual": "142029",
+ "correct": true,
+ "inputTokens": 2385,
+ "outputTokens": 3,
+ "latencyMs": 963
+ },
+ {
+ "questionId": "q31",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "142029",
+ "actual": "142029",
+ "correct": true,
+ "inputTokens": 2860,
+ "outputTokens": 6,
+ "latencyMs": 1096
+ },
+ {
+ "questionId": "q31",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "142029",
+ "actual": "142029",
+ "correct": true,
+ "inputTokens": 6320,
+ "outputTokens": 3,
+ "latencyMs": 1399
+ },
+ {
+ "questionId": "q31",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "142029",
+ "actual": "142029",
+ "correct": true,
+ "inputTokens": 6369,
+ "outputTokens": 6,
+ "latencyMs": 1594
+ },
+ {
+ "questionId": "q31",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "142029",
+ "actual": "142029",
+ "correct": true,
+ "inputTokens": 5016,
+ "outputTokens": 3,
+ "latencyMs": 1900
+ },
+ {
+ "questionId": "q31",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "142029",
+ "actual": "142029",
+ "correct": true,
+ "inputTokens": 5764,
+ "outputTokens": 6,
+ "latencyMs": 1274
+ },
+ {
+ "questionId": "q32",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Sales",
+ "correct": false,
+ "inputTokens": 6390,
+ "outputTokens": 2,
+ "latencyMs": 5224
+ },
+ {
+ "questionId": "q32",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 7869,
+ "outputTokens": 4,
+ "latencyMs": 1038
+ },
+ {
+ "questionId": "q32",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 2527,
+ "outputTokens": 2,
+ "latencyMs": 1902
+ },
+ {
+ "questionId": "q32",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 2981,
+ "outputTokens": 4,
+ "latencyMs": 1010
+ },
+ {
+ "questionId": "q32",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 2381,
+ "outputTokens": 2,
+ "latencyMs": 3263
+ },
+ {
+ "questionId": "q32",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 2855,
+ "outputTokens": 4,
+ "latencyMs": 871
+ },
+ {
+ "questionId": "q32",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Sales",
+ "correct": false,
+ "inputTokens": 6316,
+ "outputTokens": 2,
+ "latencyMs": 1278
+ },
+ {
+ "questionId": "q32",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 6364,
+ "outputTokens": 4,
+ "latencyMs": 1048
+ },
+ {
+ "questionId": "q32",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Sales",
+ "correct": false,
+ "inputTokens": 5012,
+ "outputTokens": 2,
+ "latencyMs": 1271
+ },
+ {
+ "questionId": "q32",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 5759,
+ "outputTokens": 4,
+ "latencyMs": 1075
+ },
+ {
+ "questionId": "q33",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "cheyenne_skiles@hotmail.com",
+ "actual": "cheyenne_skiles@hotmail.com",
+ "correct": true,
+ "inputTokens": 6394,
+ "outputTokens": 7,
+ "latencyMs": 1139
+ },
+ {
+ "questionId": "q33",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "cheyenne_skiles@hotmail.com",
+ "actual": "cheyenne_skiles@hotmail.com",
+ "correct": true,
+ "inputTokens": 7872,
+ "outputTokens": 14,
+ "latencyMs": 1319
+ },
+ {
+ "questionId": "q33",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "cheyenne_skiles@hotmail.com",
+ "actual": "cheyenne_skiles@hotmail.com",
+ "correct": true,
+ "inputTokens": 2531,
+ "outputTokens": 7,
+ "latencyMs": 1856
+ },
+ {
+ "questionId": "q33",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "cheyenne_skiles@hotmail.com",
+ "actual": "cheyenne_skiles@hotmail.com",
+ "correct": true,
+ "inputTokens": 2984,
+ "outputTokens": 14,
+ "latencyMs": 1393
+ },
+ {
+ "questionId": "q33",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "cheyenne_skiles@hotmail.com",
+ "actual": "cheyenne_skiles@hotmail.com",
+ "correct": true,
+ "inputTokens": 2385,
+ "outputTokens": 7,
+ "latencyMs": 1766
+ },
+ {
+ "questionId": "q33",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "cheyenne_skiles@hotmail.com",
+ "actual": "cheyenne_skiles@hotmail.com",
+ "correct": true,
+ "inputTokens": 2858,
+ "outputTokens": 14,
+ "latencyMs": 1609
+ },
+ {
+ "questionId": "q33",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "cheyenne_skiles@hotmail.com",
+ "actual": "cheyenne_skiles@hotmail.com",
+ "correct": true,
+ "inputTokens": 6320,
+ "outputTokens": 7,
+ "latencyMs": 1329
+ },
+ {
+ "questionId": "q33",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "cheyenne_skiles@hotmail.com",
+ "actual": "cheyenne_skiles@hotmail.com",
+ "correct": true,
+ "inputTokens": 6367,
+ "outputTokens": 14,
+ "latencyMs": 1178
+ },
+ {
+ "questionId": "q33",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "cheyenne_skiles@hotmail.com",
+ "actual": "cheyenne_skiles@hotmail.com",
+ "correct": true,
+ "inputTokens": 5016,
+ "outputTokens": 7,
+ "latencyMs": 1890
+ },
+ {
+ "questionId": "q33",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "cheyenne_skiles@hotmail.com",
+ "actual": "cheyenne_skiles@hotmail.com",
+ "correct": true,
+ "inputTokens": 5762,
+ "outputTokens": 14,
+ "latencyMs": 1326
+ },
+ {
+ "questionId": "q34",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "84650",
+ "actual": "84650",
+ "correct": true,
+ "inputTokens": 6392,
+ "outputTokens": 3,
+ "latencyMs": 1898
+ },
+ {
+ "questionId": "q34",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "84650",
+ "actual": "84650",
+ "correct": true,
+ "inputTokens": 7871,
+ "outputTokens": 6,
+ "latencyMs": 1074
+ },
+ {
+ "questionId": "q34",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "84650",
+ "actual": "84650",
+ "correct": true,
+ "inputTokens": 2529,
+ "outputTokens": 3,
+ "latencyMs": 1382
+ },
+ {
+ "questionId": "q34",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "84650",
+ "actual": "84650",
+ "correct": true,
+ "inputTokens": 2983,
+ "outputTokens": 6,
+ "latencyMs": 1060
+ },
+ {
+ "questionId": "q34",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "84650",
+ "actual": "84650",
+ "correct": true,
+ "inputTokens": 2383,
+ "outputTokens": 3,
+ "latencyMs": 1286
+ },
+ {
+ "questionId": "q34",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "84650",
+ "actual": "84650",
+ "correct": true,
+ "inputTokens": 2857,
+ "outputTokens": 6,
+ "latencyMs": 1591
+ },
+ {
+ "questionId": "q34",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "84650",
+ "actual": "84650",
+ "correct": true,
+ "inputTokens": 6318,
+ "outputTokens": 3,
+ "latencyMs": 2158
+ },
+ {
+ "questionId": "q34",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "84650",
+ "actual": "84650",
+ "correct": true,
+ "inputTokens": 6366,
+ "outputTokens": 6,
+ "latencyMs": 1532
+ },
+ {
+ "questionId": "q34",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "84650",
+ "actual": "84650",
+ "correct": true,
+ "inputTokens": 5014,
+ "outputTokens": 3,
+ "latencyMs": 1381
+ },
+ {
+ "questionId": "q34",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "84650",
+ "actual": "84650",
+ "correct": true,
+ "inputTokens": 5761,
+ "outputTokens": 6,
+ "latencyMs": 2262
+ },
+ {
+ "questionId": "q35",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 6391,
+ "outputTokens": 2,
+ "latencyMs": 2664
+ },
+ {
+ "questionId": "q35",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 7871,
+ "outputTokens": 4,
+ "latencyMs": 1260
+ },
+ {
+ "questionId": "q35",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 2528,
+ "outputTokens": 2,
+ "latencyMs": 1563
+ },
+ {
+ "questionId": "q35",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 2983,
+ "outputTokens": 4,
+ "latencyMs": 1415
+ },
+ {
+ "questionId": "q35",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 2382,
+ "outputTokens": 2,
+ "latencyMs": 1038
+ },
+ {
+ "questionId": "q35",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 2857,
+ "outputTokens": 4,
+ "latencyMs": 1021
+ },
+ {
+ "questionId": "q35",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 6317,
+ "outputTokens": 2,
+ "latencyMs": 4276
+ },
+ {
+ "questionId": "q35",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 6366,
+ "outputTokens": 4,
+ "latencyMs": 1301
+ },
+ {
+ "questionId": "q35",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 5013,
+ "outputTokens": 2,
+ "latencyMs": 1399
+ },
+ {
+ "questionId": "q35",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 5761,
+ "outputTokens": 4,
+ "latencyMs": 1197
+ },
+ {
+ "questionId": "q36",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "macey.gottlieb5@yahoo.com",
+ "actual": "macey.gottlieb5@yahoo.com",
+ "correct": true,
+ "inputTokens": 6390,
+ "outputTokens": 9,
+ "latencyMs": 1390
+ },
+ {
+ "questionId": "q36",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "macey.gottlieb5@yahoo.com",
+ "actual": "macey.gottlieb5@yahoo.com",
+ "correct": true,
+ "inputTokens": 7869,
+ "outputTokens": 14,
+ "latencyMs": 1482
+ },
+ {
+ "questionId": "q36",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "macey.gottlieb5@yahoo.com",
+ "actual": "macey.gottlieb5@yahoo.com",
+ "correct": true,
+ "inputTokens": 2527,
+ "outputTokens": 9,
+ "latencyMs": 1754
+ },
+ {
+ "questionId": "q36",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "macey.gottlieb5@yahoo.com",
+ "actual": "macey.gottlieb5@yahoo.com",
+ "correct": true,
+ "inputTokens": 2981,
+ "outputTokens": 14,
+ "latencyMs": 1100
+ },
+ {
+ "questionId": "q36",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "macey.gottlieb5@yahoo.com",
+ "actual": "macey.gottlieb5@yahoo.com",
+ "correct": true,
+ "inputTokens": 2381,
+ "outputTokens": 9,
+ "latencyMs": 1421
+ },
+ {
+ "questionId": "q36",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "macey.gottlieb5@yahoo.com",
+ "actual": "macey.gottlieb5@yahoo.com",
+ "correct": true,
+ "inputTokens": 2855,
+ "outputTokens": 14,
+ "latencyMs": 2173
+ },
+ {
+ "questionId": "q36",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "macey.gottlieb5@yahoo.com",
+ "actual": "macey.gottlieb5@yahoo.com",
+ "correct": true,
+ "inputTokens": 6316,
+ "outputTokens": 9,
+ "latencyMs": 2911
+ },
+ {
+ "questionId": "q36",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "macey.gottlieb5@yahoo.com",
+ "actual": "macey.gottlieb5@yahoo.com",
+ "correct": true,
+ "inputTokens": 6364,
+ "outputTokens": 14,
+ "latencyMs": 1235
+ },
+ {
+ "questionId": "q36",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "macey.gottlieb5@yahoo.com",
+ "actual": "macey.gottlieb5@yahoo.com",
+ "correct": true,
+ "inputTokens": 5012,
+ "outputTokens": 9,
+ "latencyMs": 1303
+ },
+ {
+ "questionId": "q36",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "macey.gottlieb5@yahoo.com",
+ "actual": "macey.gottlieb5@yahoo.com",
+ "correct": true,
+ "inputTokens": 5759,
+ "outputTokens": 14,
+ "latencyMs": 1148
+ },
+ {
+ "questionId": "q37",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "89773",
+ "actual": "89773",
+ "correct": true,
+ "inputTokens": 6390,
+ "outputTokens": 3,
+ "latencyMs": 1430
+ },
+ {
+ "questionId": "q37",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "89773",
+ "actual": "89773",
+ "correct": true,
+ "inputTokens": 7868,
+ "outputTokens": 6,
+ "latencyMs": 1089
+ },
+ {
+ "questionId": "q37",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "89773",
+ "actual": "89773",
+ "correct": true,
+ "inputTokens": 2527,
+ "outputTokens": 3,
+ "latencyMs": 1059
+ },
+ {
+ "questionId": "q37",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "89773",
+ "actual": "89773",
+ "correct": true,
+ "inputTokens": 2980,
+ "outputTokens": 6,
+ "latencyMs": 1057
+ },
+ {
+ "questionId": "q37",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "89773",
+ "actual": "89773",
+ "correct": true,
+ "inputTokens": 2381,
+ "outputTokens": 3,
+ "latencyMs": 1716
+ },
+ {
+ "questionId": "q37",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "89773",
+ "actual": "89773",
+ "correct": true,
+ "inputTokens": 2854,
+ "outputTokens": 6,
+ "latencyMs": 904
+ },
+ {
+ "questionId": "q37",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "89773",
+ "actual": "89773",
+ "correct": true,
+ "inputTokens": 6316,
+ "outputTokens": 3,
+ "latencyMs": 2950
+ },
+ {
+ "questionId": "q37",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "89773",
+ "actual": "89773",
+ "correct": true,
+ "inputTokens": 6363,
+ "outputTokens": 6,
+ "latencyMs": 1189
+ },
+ {
+ "questionId": "q37",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "89773",
+ "actual": "89773",
+ "correct": true,
+ "inputTokens": 5012,
+ "outputTokens": 3,
+ "latencyMs": 1050
+ },
+ {
+ "questionId": "q37",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "89773",
+ "actual": "89773",
+ "correct": true,
+ "inputTokens": 5758,
+ "outputTokens": 6,
+ "latencyMs": 1329
+ },
+ {
+ "questionId": "q38",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 6390,
+ "outputTokens": 2,
+ "latencyMs": 3410
+ },
+ {
+ "questionId": "q38",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 7868,
+ "outputTokens": 4,
+ "latencyMs": 1891
+ },
+ {
+ "questionId": "q38",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 2527,
+ "outputTokens": 2,
+ "latencyMs": 1010
+ },
+ {
+ "questionId": "q38",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 2980,
+ "outputTokens": 4,
+ "latencyMs": 988
+ },
+ {
+ "questionId": "q38",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 2381,
+ "outputTokens": 2,
+ "latencyMs": 1364
+ },
+ {
+ "questionId": "q38",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 2854,
+ "outputTokens": 4,
+ "latencyMs": 1395
+ },
+ {
+ "questionId": "q38",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 6316,
+ "outputTokens": 2,
+ "latencyMs": 2293
+ },
+ {
+ "questionId": "q38",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 6363,
+ "outputTokens": 4,
+ "latencyMs": 1137
+ },
+ {
+ "questionId": "q38",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 5012,
+ "outputTokens": 2,
+ "latencyMs": 1451
+ },
+ {
+ "questionId": "q38",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "Marketing",
+ "actual": "Marketing",
+ "correct": true,
+ "inputTokens": 5758,
+ "outputTokens": 4,
+ "latencyMs": 1100
+ },
+ {
+ "questionId": "q39",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "georgianna_renner@yahoo.com",
+ "actual": "georgianna_renner@yahoo.com",
+ "correct": true,
+ "inputTokens": 6390,
+ "outputTokens": 10,
+ "latencyMs": 1674
+ },
+ {
+ "questionId": "q39",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "georgianna_renner@yahoo.com",
+ "actual": "georgianna_renner@yahoo.com",
+ "correct": true,
+ "inputTokens": 7869,
+ "outputTokens": 13,
+ "latencyMs": 1403
+ },
+ {
+ "questionId": "q39",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "georgianna_renner@yahoo.com",
+ "actual": "georgianna_renner@yahoo.com",
+ "correct": true,
+ "inputTokens": 2527,
+ "outputTokens": 10,
+ "latencyMs": 1413
+ },
+ {
+ "questionId": "q39",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "georgianna_renner@yahoo.com",
+ "actual": "georgianna_renner@yahoo.com",
+ "correct": true,
+ "inputTokens": 2981,
+ "outputTokens": 13,
+ "latencyMs": 1200
+ },
+ {
+ "questionId": "q39",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "georgianna_renner@yahoo.com",
+ "actual": "georgianna_renner@yahoo.com",
+ "correct": true,
+ "inputTokens": 2381,
+ "outputTokens": 10,
+ "latencyMs": 1730
+ },
+ {
+ "questionId": "q39",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "georgianna_renner@yahoo.com",
+ "actual": "georgianna_renner@yahoo.com",
+ "correct": true,
+ "inputTokens": 2855,
+ "outputTokens": 13,
+ "latencyMs": 1226
+ },
+ {
+ "questionId": "q39",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "georgianna_renner@yahoo.com",
+ "actual": "georgianna_renner@yahoo.com",
+ "correct": true,
+ "inputTokens": 6316,
+ "outputTokens": 10,
+ "latencyMs": 1251
+ },
+ {
+ "questionId": "q39",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "georgianna_renner@yahoo.com",
+ "actual": "georgianna_renner@yahoo.com",
+ "correct": true,
+ "inputTokens": 6364,
+ "outputTokens": 13,
+ "latencyMs": 1337
+ },
+ {
+ "questionId": "q39",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "georgianna_renner@yahoo.com",
+ "actual": "georgianna_renner@yahoo.com",
+ "correct": true,
+ "inputTokens": 5012,
+ "outputTokens": 10,
+ "latencyMs": 2368
+ },
+ {
+ "questionId": "q39",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "georgianna_renner@yahoo.com",
+ "actual": "georgianna_renner@yahoo.com",
+ "correct": true,
+ "inputTokens": 5759,
+ "outputTokens": 13,
+ "latencyMs": 1251
+ },
+ {
+ "questionId": "q40",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "49741",
+ "actual": "49741",
+ "correct": true,
+ "inputTokens": 6391,
+ "outputTokens": 3,
+ "latencyMs": 3815
+ },
+ {
+ "questionId": "q40",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "49741",
+ "actual": "49741",
+ "correct": true,
+ "inputTokens": 7871,
+ "outputTokens": 6,
+ "latencyMs": 1169
+ },
+ {
+ "questionId": "q40",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "49741",
+ "actual": "49741",
+ "correct": true,
+ "inputTokens": 2528,
+ "outputTokens": 3,
+ "latencyMs": 1070
+ },
+ {
+ "questionId": "q40",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "49741",
+ "actual": "49741",
+ "correct": true,
+ "inputTokens": 2983,
+ "outputTokens": 6,
+ "latencyMs": 1162
+ },
+ {
+ "questionId": "q40",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "49741",
+ "actual": "49741",
+ "correct": true,
+ "inputTokens": 2382,
+ "outputTokens": 3,
+ "latencyMs": 1115
+ },
+ {
+ "questionId": "q40",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "49741",
+ "actual": "144426",
+ "correct": false,
+ "inputTokens": 2857,
+ "outputTokens": 6,
+ "latencyMs": 1365
+ },
+ {
+ "questionId": "q40",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "49741",
+ "actual": "49741",
+ "correct": true,
+ "inputTokens": 6317,
+ "outputTokens": 3,
+ "latencyMs": 2004
+ },
+ {
+ "questionId": "q40",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "49741",
+ "actual": "49741",
+ "correct": true,
+ "inputTokens": 6366,
+ "outputTokens": 6,
+ "latencyMs": 1113
+ },
+ {
+ "questionId": "q40",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "49741",
+ "actual": "49741",
+ "correct": true,
+ "inputTokens": 5013,
+ "outputTokens": 3,
+ "latencyMs": 3055
+ },
+ {
+ "questionId": "q40",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "49741",
+ "actual": "49741",
+ "correct": true,
+ "inputTokens": 5761,
+ "outputTokens": 6,
+ "latencyMs": 1392
+ },
+ {
+ "questionId": "q41",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "17",
+ "actual": "20",
+ "correct": false,
+ "inputTokens": 6388,
+ "outputTokens": 2,
+ "latencyMs": 3877
+ },
+ {
+ "questionId": "q41",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "17",
+ "actual": "15",
+ "correct": false,
+ "inputTokens": 7865,
+ "outputTokens": 5,
+ "latencyMs": 1128
+ },
+ {
+ "questionId": "q41",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "17",
+ "actual": "20",
+ "correct": false,
+ "inputTokens": 2525,
+ "outputTokens": 2,
+ "latencyMs": 966
+ },
+ {
+ "questionId": "q41",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "17",
+ "actual": "15",
+ "correct": false,
+ "inputTokens": 2977,
+ "outputTokens": 5,
+ "latencyMs": 1070
+ },
+ {
+ "questionId": "q41",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "17",
+ "actual": "20",
+ "correct": false,
+ "inputTokens": 2379,
+ "outputTokens": 2,
+ "latencyMs": 2411
+ },
+ {
+ "questionId": "q41",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "17",
+ "actual": "15",
+ "correct": false,
+ "inputTokens": 2851,
+ "outputTokens": 5,
+ "latencyMs": 1286
+ },
+ {
+ "questionId": "q41",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "17",
+ "actual": "20",
+ "correct": false,
+ "inputTokens": 6314,
+ "outputTokens": 2,
+ "latencyMs": 2082
+ },
+ {
+ "questionId": "q41",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "17",
+ "actual": "15",
+ "correct": false,
+ "inputTokens": 6360,
+ "outputTokens": 5,
+ "latencyMs": 1107
+ },
+ {
+ "questionId": "q41",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "17",
+ "actual": "20",
+ "correct": false,
+ "inputTokens": 5010,
+ "outputTokens": 2,
+ "latencyMs": 1216
+ },
+ {
+ "questionId": "q41",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "17",
+ "actual": "15",
+ "correct": false,
+ "inputTokens": 5755,
+ "outputTokens": 5,
+ "latencyMs": 1052
+ },
+ {
+ "questionId": "q42",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "17",
+ "actual": "20",
+ "correct": false,
+ "inputTokens": 6388,
+ "outputTokens": 2,
+ "latencyMs": 1572
+ },
+ {
+ "questionId": "q42",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "17",
+ "actual": "15",
+ "correct": false,
+ "inputTokens": 7865,
+ "outputTokens": 5,
+ "latencyMs": 1084
+ },
+ {
+ "questionId": "q42",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "17",
+ "actual": "20",
+ "correct": false,
+ "inputTokens": 2525,
+ "outputTokens": 2,
+ "latencyMs": 1377
+ },
+ {
+ "questionId": "q42",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "17",
+ "actual": "14",
+ "correct": false,
+ "inputTokens": 2977,
+ "outputTokens": 5,
+ "latencyMs": 1197
+ },
+ {
+ "questionId": "q42",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "17",
+ "actual": "20",
+ "correct": false,
+ "inputTokens": 2379,
+ "outputTokens": 2,
+ "latencyMs": 2705
+ },
+ {
+ "questionId": "q42",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "17",
+ "actual": "15",
+ "correct": false,
+ "inputTokens": 2851,
+ "outputTokens": 5,
+ "latencyMs": 1020
+ },
+ {
+ "questionId": "q42",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "17",
+ "actual": "20",
+ "correct": false,
+ "inputTokens": 6314,
+ "outputTokens": 2,
+ "latencyMs": 5345
+ },
+ {
+ "questionId": "q42",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "17",
+ "actual": "14",
+ "correct": false,
+ "inputTokens": 6360,
+ "outputTokens": 5,
+ "latencyMs": 1207
+ },
+ {
+ "questionId": "q42",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "17",
+ "actual": "20",
+ "correct": false,
+ "inputTokens": 5010,
+ "outputTokens": 2,
+ "latencyMs": 921
+ },
+ {
+ "questionId": "q42",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "17",
+ "actual": "15",
+ "correct": false,
+ "inputTokens": 5755,
+ "outputTokens": 5,
+ "latencyMs": 1289
+ },
+ {
+ "questionId": "q43",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "17",
+ "actual": "20",
+ "correct": false,
+ "inputTokens": 6388,
+ "outputTokens": 2,
+ "latencyMs": 2423
+ },
+ {
+ "questionId": "q43",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "17",
+ "actual": "15",
+ "correct": false,
+ "inputTokens": 7865,
+ "outputTokens": 5,
+ "latencyMs": 1273
+ },
+ {
+ "questionId": "q43",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "17",
+ "actual": "20",
+ "correct": false,
+ "inputTokens": 2525,
+ "outputTokens": 2,
+ "latencyMs": 975
+ },
+ {
+ "questionId": "q43",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "17",
+ "actual": "15",
+ "correct": false,
+ "inputTokens": 2977,
+ "outputTokens": 5,
+ "latencyMs": 1301
+ },
+ {
+ "questionId": "q43",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "17",
+ "actual": "20",
+ "correct": false,
+ "inputTokens": 2379,
+ "outputTokens": 2,
+ "latencyMs": 1423
+ },
+ {
+ "questionId": "q43",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "17",
+ "actual": "15",
+ "correct": false,
+ "inputTokens": 2851,
+ "outputTokens": 5,
+ "latencyMs": 927
+ },
+ {
+ "questionId": "q43",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "17",
+ "actual": "20",
+ "correct": false,
+ "inputTokens": 6314,
+ "outputTokens": 2,
+ "latencyMs": 1258
+ },
+ {
+ "questionId": "q43",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "17",
+ "actual": "15",
+ "correct": false,
+ "inputTokens": 6360,
+ "outputTokens": 5,
+ "latencyMs": 1250
+ },
+ {
+ "questionId": "q43",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "17",
+ "actual": "20",
+ "correct": false,
+ "inputTokens": 5010,
+ "outputTokens": 2,
+ "latencyMs": 872
+ },
+ {
+ "questionId": "q43",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "17",
+ "actual": "15",
+ "correct": false,
+ "inputTokens": 5755,
+ "outputTokens": 5,
+ "latencyMs": 1385
+ },
+ {
+ "questionId": "q44",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "17",
+ "actual": "20",
+ "correct": false,
+ "inputTokens": 6388,
+ "outputTokens": 2,
+ "latencyMs": 1201
+ },
+ {
+ "questionId": "q44",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "17",
+ "actual": "15",
+ "correct": false,
+ "inputTokens": 7865,
+ "outputTokens": 5,
+ "latencyMs": 1149
+ },
+ {
+ "questionId": "q44",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "17",
+ "actual": "20",
+ "correct": false,
+ "inputTokens": 2525,
+ "outputTokens": 2,
+ "latencyMs": 1498
+ },
+ {
+ "questionId": "q44",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "17",
+ "actual": "15",
+ "correct": false,
+ "inputTokens": 2977,
+ "outputTokens": 5,
+ "latencyMs": 1149
+ },
+ {
+ "questionId": "q44",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "17",
+ "actual": "20",
+ "correct": false,
+ "inputTokens": 2379,
+ "outputTokens": 2,
+ "latencyMs": 1098
+ },
+ {
+ "questionId": "q44",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "17",
+ "actual": "15",
+ "correct": false,
+ "inputTokens": 2851,
+ "outputTokens": 5,
+ "latencyMs": 1121
+ },
+ {
+ "questionId": "q44",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "17",
+ "actual": "20",
+ "correct": false,
+ "inputTokens": 6314,
+ "outputTokens": 2,
+ "latencyMs": 2522
+ },
+ {
+ "questionId": "q44",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "17",
+ "actual": "10",
+ "correct": false,
+ "inputTokens": 6360,
+ "outputTokens": 5,
+ "latencyMs": 1532
+ },
+ {
+ "questionId": "q44",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "17",
+ "actual": "20",
+ "correct": false,
+ "inputTokens": 5010,
+ "outputTokens": 2,
+ "latencyMs": 4914
+ },
+ {
+ "questionId": "q44",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "17",
+ "actual": "15",
+ "correct": false,
+ "inputTokens": 5755,
+ "outputTokens": 5,
+ "latencyMs": 1324
+ },
+ {
+ "questionId": "q45",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "16",
+ "actual": "20",
+ "correct": false,
+ "inputTokens": 6388,
+ "outputTokens": 2,
+ "latencyMs": 1446
+ },
+ {
+ "questionId": "q45",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "16",
+ "actual": "12",
+ "correct": false,
+ "inputTokens": 7865,
+ "outputTokens": 5,
+ "latencyMs": 1105
+ },
+ {
+ "questionId": "q45",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "16",
+ "actual": "20",
+ "correct": false,
+ "inputTokens": 2525,
+ "outputTokens": 2,
+ "latencyMs": 1297
+ },
+ {
+ "questionId": "q45",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "16",
+ "actual": "15",
+ "correct": false,
+ "inputTokens": 2977,
+ "outputTokens": 5,
+ "latencyMs": 1251
+ },
+ {
+ "questionId": "q45",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "16",
+ "actual": "20",
+ "correct": false,
+ "inputTokens": 2379,
+ "outputTokens": 2,
+ "latencyMs": 1561
+ },
+ {
+ "questionId": "q45",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "16",
+ "actual": "15",
+ "correct": false,
+ "inputTokens": 2851,
+ "outputTokens": 5,
+ "latencyMs": 1292
+ },
+ {
+ "questionId": "q45",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "16",
+ "actual": "20",
+ "correct": false,
+ "inputTokens": 6314,
+ "outputTokens": 2,
+ "latencyMs": 1127
+ },
+ {
+ "questionId": "q45",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "16",
+ "actual": "12",
+ "correct": false,
+ "inputTokens": 6360,
+ "outputTokens": 5,
+ "latencyMs": 1207
+ },
+ {
+ "questionId": "q45",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "16",
+ "actual": "20",
+ "correct": false,
+ "inputTokens": 5010,
+ "outputTokens": 2,
+ "latencyMs": 1582
+ },
+ {
+ "questionId": "q45",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "16",
+ "actual": "15",
+ "correct": false,
+ "inputTokens": 5755,
+ "outputTokens": 5,
+ "latencyMs": 1278
+ },
+ {
+ "questionId": "q46",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "16",
+ "actual": "20",
+ "correct": false,
+ "inputTokens": 6388,
+ "outputTokens": 2,
+ "latencyMs": 1278
+ },
+ {
+ "questionId": "q46",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "16",
+ "actual": "10",
+ "correct": false,
+ "inputTokens": 7865,
+ "outputTokens": 5,
+ "latencyMs": 3084
+ },
+ {
+ "questionId": "q46",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "16",
+ "actual": "20",
+ "correct": false,
+ "inputTokens": 2525,
+ "outputTokens": 2,
+ "latencyMs": 1289
+ },
+ {
+ "questionId": "q46",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "16",
+ "actual": "15",
+ "correct": false,
+ "inputTokens": 2977,
+ "outputTokens": 5,
+ "latencyMs": 1591
+ },
+ {
+ "questionId": "q46",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "16",
+ "actual": "20",
+ "correct": false,
+ "inputTokens": 2379,
+ "outputTokens": 2,
+ "latencyMs": 3038
+ },
+ {
+ "questionId": "q46",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "16",
+ "actual": "15",
+ "correct": false,
+ "inputTokens": 2851,
+ "outputTokens": 5,
+ "latencyMs": 1447
+ },
+ {
+ "questionId": "q46",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "16",
+ "actual": "20",
+ "correct": false,
+ "inputTokens": 6314,
+ "outputTokens": 2,
+ "latencyMs": 1224
+ },
+ {
+ "questionId": "q46",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "16",
+ "actual": "10",
+ "correct": false,
+ "inputTokens": 6360,
+ "outputTokens": 5,
+ "latencyMs": 1250
+ },
+ {
+ "questionId": "q46",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "16",
+ "actual": "20",
+ "correct": false,
+ "inputTokens": 5010,
+ "outputTokens": 2,
+ "latencyMs": 1364
+ },
+ {
+ "questionId": "q46",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "16",
+ "actual": "12",
+ "correct": false,
+ "inputTokens": 5755,
+ "outputTokens": 5,
+ "latencyMs": 1560
+ },
+ {
+ "questionId": "q47",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "91",
+ "actual": "66",
+ "correct": false,
+ "inputTokens": 6393,
+ "outputTokens": 2,
+ "latencyMs": 989
+ },
+ {
+ "questionId": "q47",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "91",
+ "actual": "89",
+ "correct": false,
+ "inputTokens": 7870,
+ "outputTokens": 5,
+ "latencyMs": 1358
+ },
+ {
+ "questionId": "q47",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "91",
+ "actual": "66",
+ "correct": false,
+ "inputTokens": 2530,
+ "outputTokens": 2,
+ "latencyMs": 1406
+ },
+ {
+ "questionId": "q47",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "91",
+ "actual": "85",
+ "correct": false,
+ "inputTokens": 2982,
+ "outputTokens": 5,
+ "latencyMs": 1123
+ },
+ {
+ "questionId": "q47",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "91",
+ "actual": "66",
+ "correct": false,
+ "inputTokens": 2384,
+ "outputTokens": 2,
+ "latencyMs": 4883
+ },
+ {
+ "questionId": "q47",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "91",
+ "actual": "85",
+ "correct": false,
+ "inputTokens": 2856,
+ "outputTokens": 5,
+ "latencyMs": 1402
+ },
+ {
+ "questionId": "q47",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "91",
+ "actual": "66",
+ "correct": false,
+ "inputTokens": 6319,
+ "outputTokens": 2,
+ "latencyMs": 1915
+ },
+ {
+ "questionId": "q47",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "91",
+ "actual": "89",
+ "correct": false,
+ "inputTokens": 6365,
+ "outputTokens": 5,
+ "latencyMs": 1263
+ },
+ {
+ "questionId": "q47",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "91",
+ "actual": "66",
+ "correct": false,
+ "inputTokens": 5015,
+ "outputTokens": 2,
+ "latencyMs": 1448
+ },
+ {
+ "questionId": "q47",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "91",
+ "actual": "89",
+ "correct": false,
+ "inputTokens": 5760,
+ "outputTokens": 5,
+ "latencyMs": 1243
+ },
+ {
+ "questionId": "q48",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "67",
+ "actual": "54",
+ "correct": false,
+ "inputTokens": 6393,
+ "outputTokens": 2,
+ "latencyMs": 1456
+ },
+ {
+ "questionId": "q48",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "67",
+ "actual": "57",
+ "correct": false,
+ "inputTokens": 7870,
+ "outputTokens": 5,
+ "latencyMs": 1186
+ },
+ {
+ "questionId": "q48",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "67",
+ "actual": "54",
+ "correct": false,
+ "inputTokens": 2530,
+ "outputTokens": 2,
+ "latencyMs": 1076
+ },
+ {
+ "questionId": "q48",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "67",
+ "actual": "47",
+ "correct": false,
+ "inputTokens": 2982,
+ "outputTokens": 5,
+ "latencyMs": 1168
+ },
+ {
+ "questionId": "q48",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "67",
+ "actual": "56",
+ "correct": false,
+ "inputTokens": 2384,
+ "outputTokens": 2,
+ "latencyMs": 3105
+ },
+ {
+ "questionId": "q48",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "67",
+ "actual": "47",
+ "correct": false,
+ "inputTokens": 2856,
+ "outputTokens": 5,
+ "latencyMs": 1375
+ },
+ {
+ "questionId": "q48",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "67",
+ "actual": "66",
+ "correct": false,
+ "inputTokens": 6319,
+ "outputTokens": 2,
+ "latencyMs": 1618
+ },
+ {
+ "questionId": "q48",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "67",
+ "actual": "47",
+ "correct": false,
+ "inputTokens": 6365,
+ "outputTokens": 5,
+ "latencyMs": 1454
+ },
+ {
+ "questionId": "q48",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "67",
+ "actual": "54",
+ "correct": false,
+ "inputTokens": 5015,
+ "outputTokens": 2,
+ "latencyMs": 1244
+ },
+ {
+ "questionId": "q48",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "67",
+ "actual": "57",
+ "correct": false,
+ "inputTokens": 5760,
+ "outputTokens": 5,
+ "latencyMs": 1113
+ },
+ {
+ "questionId": "q49",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "41",
+ "actual": "30",
+ "correct": false,
+ "inputTokens": 6393,
+ "outputTokens": 2,
+ "latencyMs": 1267
+ },
+ {
+ "questionId": "q49",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "41",
+ "actual": "31",
+ "correct": false,
+ "inputTokens": 7870,
+ "outputTokens": 5,
+ "latencyMs": 1227
+ },
+ {
+ "questionId": "q49",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "41",
+ "actual": "30",
+ "correct": false,
+ "inputTokens": 2530,
+ "outputTokens": 2,
+ "latencyMs": 1246
+ },
+ {
+ "questionId": "q49",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "41",
+ "actual": "27",
+ "correct": false,
+ "inputTokens": 2982,
+ "outputTokens": 5,
+ "latencyMs": 1127
+ },
+ {
+ "questionId": "q49",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "41",
+ "actual": "34",
+ "correct": false,
+ "inputTokens": 2384,
+ "outputTokens": 2,
+ "latencyMs": 1260
+ },
+ {
+ "questionId": "q49",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "41",
+ "actual": "31",
+ "correct": false,
+ "inputTokens": 2856,
+ "outputTokens": 5,
+ "latencyMs": 1293
+ },
+ {
+ "questionId": "q49",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "41",
+ "actual": "24",
+ "correct": false,
+ "inputTokens": 6319,
+ "outputTokens": 2,
+ "latencyMs": 1246
+ },
+ {
+ "questionId": "q49",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "41",
+ "actual": "27",
+ "correct": false,
+ "inputTokens": 6365,
+ "outputTokens": 5,
+ "latencyMs": 1598
+ },
+ {
+ "questionId": "q49",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "41",
+ "actual": "24",
+ "correct": false,
+ "inputTokens": 5015,
+ "outputTokens": 2,
+ "latencyMs": 1471
+ },
+ {
+ "questionId": "q49",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "41",
+ "actual": "31",
+ "correct": false,
+ "inputTokens": 5760,
+ "outputTokens": 5,
+ "latencyMs": 1311
+ },
+ {
+ "questionId": "q50",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "26",
+ "actual": "22",
+ "correct": false,
+ "inputTokens": 6393,
+ "outputTokens": 2,
+ "latencyMs": 3950
+ },
+ {
+ "questionId": "q50",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "26",
+ "actual": "20",
+ "correct": false,
+ "inputTokens": 7870,
+ "outputTokens": 5,
+ "latencyMs": 1075
+ },
+ {
+ "questionId": "q50",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "26",
+ "actual": "22",
+ "correct": false,
+ "inputTokens": 2530,
+ "outputTokens": 2,
+ "latencyMs": 1868
+ },
+ {
+ "questionId": "q50",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "26",
+ "actual": "16",
+ "correct": false,
+ "inputTokens": 2982,
+ "outputTokens": 5,
+ "latencyMs": 1075
+ },
+ {
+ "questionId": "q50",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "26",
+ "actual": "24",
+ "correct": false,
+ "inputTokens": 2384,
+ "outputTokens": 2,
+ "latencyMs": 1973
+ },
+ {
+ "questionId": "q50",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "26",
+ "actual": "16",
+ "correct": false,
+ "inputTokens": 2856,
+ "outputTokens": 5,
+ "latencyMs": 947
+ },
+ {
+ "questionId": "q50",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "26",
+ "actual": "22",
+ "correct": false,
+ "inputTokens": 6319,
+ "outputTokens": 2,
+ "latencyMs": 1414
+ },
+ {
+ "questionId": "q50",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "26",
+ "actual": "16",
+ "correct": false,
+ "inputTokens": 6365,
+ "outputTokens": 5,
+ "latencyMs": 1221
+ },
+ {
+ "questionId": "q50",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "26",
+ "actual": "18",
+ "correct": false,
+ "inputTokens": 5015,
+ "outputTokens": 2,
+ "latencyMs": 1148
+ },
+ {
+ "questionId": "q50",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "26",
+ "actual": "20",
+ "correct": false,
+ "inputTokens": 5760,
+ "outputTokens": 5,
+ "latencyMs": 1286
+ },
+ {
+ "questionId": "q51",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "78",
+ "actual": "66",
+ "correct": false,
+ "inputTokens": 6387,
+ "outputTokens": 2,
+ "latencyMs": 2525
+ },
+ {
+ "questionId": "q51",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "78",
+ "actual": "81",
+ "correct": false,
+ "inputTokens": 7864,
+ "outputTokens": 5,
+ "latencyMs": 1613
+ },
+ {
+ "questionId": "q51",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "78",
+ "actual": "66",
+ "correct": false,
+ "inputTokens": 2524,
+ "outputTokens": 2,
+ "latencyMs": 1132
+ },
+ {
+ "questionId": "q51",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "78",
+ "actual": "78",
+ "correct": true,
+ "inputTokens": 2976,
+ "outputTokens": 5,
+ "latencyMs": 1104
+ },
+ {
+ "questionId": "q51",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "78",
+ "actual": "77",
+ "correct": false,
+ "inputTokens": 2378,
+ "outputTokens": 2,
+ "latencyMs": 1069
+ },
+ {
+ "questionId": "q51",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "78",
+ "actual": "73",
+ "correct": false,
+ "inputTokens": 2850,
+ "outputTokens": 5,
+ "latencyMs": 1113
+ },
+ {
+ "questionId": "q51",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "78",
+ "actual": "66",
+ "correct": false,
+ "inputTokens": 6313,
+ "outputTokens": 2,
+ "latencyMs": 1999
+ },
+ {
+ "questionId": "q51",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "78",
+ "actual": "78",
+ "correct": true,
+ "inputTokens": 6359,
+ "outputTokens": 5,
+ "latencyMs": 1214
+ },
+ {
+ "questionId": "q51",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "78",
+ "actual": "66",
+ "correct": false,
+ "inputTokens": 5009,
+ "outputTokens": 2,
+ "latencyMs": 1613
+ },
+ {
+ "questionId": "q51",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "78",
+ "actual": "77",
+ "correct": false,
+ "inputTokens": 5754,
+ "outputTokens": 5,
+ "latencyMs": 1012
+ },
+ {
+ "questionId": "q52",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "22",
+ "actual": "30",
+ "correct": false,
+ "inputTokens": 6387,
+ "outputTokens": 2,
+ "latencyMs": 1580
+ },
+ {
+ "questionId": "q52",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "22",
+ "actual": "15",
+ "correct": false,
+ "inputTokens": 7864,
+ "outputTokens": 5,
+ "latencyMs": 1688
+ },
+ {
+ "questionId": "q52",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "22",
+ "actual": "22",
+ "correct": true,
+ "inputTokens": 2524,
+ "outputTokens": 2,
+ "latencyMs": 1290
+ },
+ {
+ "questionId": "q52",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "22",
+ "actual": "16",
+ "correct": false,
+ "inputTokens": 2976,
+ "outputTokens": 5,
+ "latencyMs": 1121
+ },
+ {
+ "questionId": "q52",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "22",
+ "actual": "10",
+ "correct": false,
+ "inputTokens": 2378,
+ "outputTokens": 2,
+ "latencyMs": 1544
+ },
+ {
+ "questionId": "q52",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "22",
+ "actual": "20",
+ "correct": false,
+ "inputTokens": 2850,
+ "outputTokens": 5,
+ "latencyMs": 822
+ },
+ {
+ "questionId": "q52",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "22",
+ "actual": "34",
+ "correct": false,
+ "inputTokens": 6313,
+ "outputTokens": 2,
+ "latencyMs": 2718
+ },
+ {
+ "questionId": "q52",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "22",
+ "actual": "15",
+ "correct": false,
+ "inputTokens": 6359,
+ "outputTokens": 5,
+ "latencyMs": 1211
+ },
+ {
+ "questionId": "q52",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "22",
+ "actual": "34",
+ "correct": false,
+ "inputTokens": 5009,
+ "outputTokens": 2,
+ "latencyMs": 1162
+ },
+ {
+ "questionId": "q52",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "22",
+ "actual": "16",
+ "correct": false,
+ "inputTokens": 5754,
+ "outputTokens": 5,
+ "latencyMs": 1156
+ },
+ {
+ "questionId": "q53",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "12",
+ "actual": "24",
+ "correct": false,
+ "inputTokens": 6395,
+ "outputTokens": 2,
+ "latencyMs": 1089
+ },
+ {
+ "questionId": "q53",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "12",
+ "actual": "9",
+ "correct": false,
+ "inputTokens": 7872,
+ "outputTokens": 5,
+ "latencyMs": 1368
+ },
+ {
+ "questionId": "q53",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "12",
+ "actual": "24",
+ "correct": false,
+ "inputTokens": 2532,
+ "outputTokens": 2,
+ "latencyMs": 1850
+ },
+ {
+ "questionId": "q53",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "12",
+ "actual": "9",
+ "correct": false,
+ "inputTokens": 2984,
+ "outputTokens": 5,
+ "latencyMs": 914
+ },
+ {
+ "questionId": "q53",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "12",
+ "actual": "34",
+ "correct": false,
+ "inputTokens": 2386,
+ "outputTokens": 2,
+ "latencyMs": 1156
+ },
+ {
+ "questionId": "q53",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "12",
+ "actual": "10",
+ "correct": false,
+ "inputTokens": 2858,
+ "outputTokens": 5,
+ "latencyMs": 1118
+ },
+ {
+ "questionId": "q53",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "12",
+ "actual": "22",
+ "correct": false,
+ "inputTokens": 6321,
+ "outputTokens": 2,
+ "latencyMs": 1020
+ },
+ {
+ "questionId": "q53",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "12",
+ "actual": "8",
+ "correct": false,
+ "inputTokens": 6367,
+ "outputTokens": 5,
+ "latencyMs": 1021
+ },
+ {
+ "questionId": "q53",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "12",
+ "actual": "18",
+ "correct": false,
+ "inputTokens": 5017,
+ "outputTokens": 2,
+ "latencyMs": 1236
+ },
+ {
+ "questionId": "q53",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "12",
+ "actual": "10",
+ "correct": false,
+ "inputTokens": 5762,
+ "outputTokens": 5,
+ "latencyMs": 1574
+ },
+ {
+ "questionId": "q54",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "11",
+ "actual": "24",
+ "correct": false,
+ "inputTokens": 6395,
+ "outputTokens": 2,
+ "latencyMs": 1437
+ },
+ {
+ "questionId": "q54",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "11",
+ "actual": "7",
+ "correct": false,
+ "inputTokens": 7872,
+ "outputTokens": 5,
+ "latencyMs": 1091
+ },
+ {
+ "questionId": "q54",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "11",
+ "actual": "24",
+ "correct": false,
+ "inputTokens": 2532,
+ "outputTokens": 2,
+ "latencyMs": 1917
+ },
+ {
+ "questionId": "q54",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "11",
+ "actual": "6",
+ "correct": false,
+ "inputTokens": 2984,
+ "outputTokens": 5,
+ "latencyMs": 1095
+ },
+ {
+ "questionId": "q54",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "11",
+ "actual": "34",
+ "correct": false,
+ "inputTokens": 2386,
+ "outputTokens": 2,
+ "latencyMs": 4230
+ },
+ {
+ "questionId": "q54",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "11",
+ "actual": "8",
+ "correct": false,
+ "inputTokens": 2858,
+ "outputTokens": 5,
+ "latencyMs": 1187
+ },
+ {
+ "questionId": "q54",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "11",
+ "actual": "24",
+ "correct": false,
+ "inputTokens": 6321,
+ "outputTokens": 2,
+ "latencyMs": 1197
+ },
+ {
+ "questionId": "q54",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "11",
+ "actual": "6",
+ "correct": false,
+ "inputTokens": 6367,
+ "outputTokens": 5,
+ "latencyMs": 1176
+ },
+ {
+ "questionId": "q54",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "11",
+ "actual": "18",
+ "correct": false,
+ "inputTokens": 5017,
+ "outputTokens": 2,
+ "latencyMs": 1249
+ },
+ {
+ "questionId": "q54",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "11",
+ "actual": "8",
+ "correct": false,
+ "inputTokens": 5762,
+ "outputTokens": 5,
+ "latencyMs": 1383
+ },
+ {
+ "questionId": "q55",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "11",
+ "actual": "30",
+ "correct": false,
+ "inputTokens": 6395,
+ "outputTokens": 2,
+ "latencyMs": 1149
+ },
+ {
+ "questionId": "q55",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "11",
+ "actual": "8",
+ "correct": false,
+ "inputTokens": 7872,
+ "outputTokens": 5,
+ "latencyMs": 1072
+ },
+ {
+ "questionId": "q55",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "11",
+ "actual": "18",
+ "correct": false,
+ "inputTokens": 2532,
+ "outputTokens": 2,
+ "latencyMs": 1213
+ },
+ {
+ "questionId": "q55",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "11",
+ "actual": "7",
+ "correct": false,
+ "inputTokens": 2984,
+ "outputTokens": 5,
+ "latencyMs": 1507
+ },
+ {
+ "questionId": "q55",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "11",
+ "actual": "34",
+ "correct": false,
+ "inputTokens": 2386,
+ "outputTokens": 2,
+ "latencyMs": 1826
+ },
+ {
+ "questionId": "q55",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "11",
+ "actual": "8",
+ "correct": false,
+ "inputTokens": 2858,
+ "outputTokens": 5,
+ "latencyMs": 1162
+ },
+ {
+ "questionId": "q55",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "11",
+ "actual": "24",
+ "correct": false,
+ "inputTokens": 6321,
+ "outputTokens": 2,
+ "latencyMs": 1008
+ },
+ {
+ "questionId": "q55",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "11",
+ "actual": "7",
+ "correct": false,
+ "inputTokens": 6367,
+ "outputTokens": 5,
+ "latencyMs": 1285
+ },
+ {
+ "questionId": "q55",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "11",
+ "actual": "22",
+ "correct": false,
+ "inputTokens": 5017,
+ "outputTokens": 2,
+ "latencyMs": 1124
+ },
+ {
+ "questionId": "q55",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "11",
+ "actual": "9",
+ "correct": false,
+ "inputTokens": 5762,
+ "outputTokens": 5,
+ "latencyMs": 1212
+ },
+ {
+ "questionId": "q56",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "12",
+ "actual": "22",
+ "correct": false,
+ "inputTokens": 6395,
+ "outputTokens": 2,
+ "latencyMs": 1232
+ },
+ {
+ "questionId": "q56",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "12",
+ "actual": "7",
+ "correct": false,
+ "inputTokens": 7872,
+ "outputTokens": 5,
+ "latencyMs": 1792
+ },
+ {
+ "questionId": "q56",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "12",
+ "actual": "12",
+ "correct": true,
+ "inputTokens": 2532,
+ "outputTokens": 2,
+ "latencyMs": 1357
+ },
+ {
+ "questionId": "q56",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "12",
+ "actual": "6",
+ "correct": false,
+ "inputTokens": 2984,
+ "outputTokens": 5,
+ "latencyMs": 1247
+ },
+ {
+ "questionId": "q56",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "12",
+ "actual": "22",
+ "correct": false,
+ "inputTokens": 2386,
+ "outputTokens": 2,
+ "latencyMs": 1043
+ },
+ {
+ "questionId": "q56",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "12",
+ "actual": "7",
+ "correct": false,
+ "inputTokens": 2858,
+ "outputTokens": 5,
+ "latencyMs": 1065
+ },
+ {
+ "questionId": "q56",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "12",
+ "actual": "10",
+ "correct": false,
+ "inputTokens": 6321,
+ "outputTokens": 2,
+ "latencyMs": 1298
+ },
+ {
+ "questionId": "q56",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "12",
+ "actual": "7",
+ "correct": false,
+ "inputTokens": 6367,
+ "outputTokens": 5,
+ "latencyMs": 1767
+ },
+ {
+ "questionId": "q56",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "12",
+ "actual": "10",
+ "correct": false,
+ "inputTokens": 5017,
+ "outputTokens": 2,
+ "latencyMs": 3525
+ },
+ {
+ "questionId": "q56",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "12",
+ "actual": "8",
+ "correct": false,
+ "inputTokens": 5762,
+ "outputTokens": 5,
+ "latencyMs": 1355
+ },
+ {
+ "questionId": "q57",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "62",
+ "actual": "54",
+ "correct": false,
+ "inputTokens": 6394,
+ "outputTokens": 2,
+ "latencyMs": 1359
+ },
+ {
+ "questionId": "q57",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "62",
+ "actual": "62",
+ "correct": true,
+ "inputTokens": 7872,
+ "outputTokens": 5,
+ "latencyMs": 1447
+ },
+ {
+ "questionId": "q57",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "62",
+ "actual": "54",
+ "correct": false,
+ "inputTokens": 2531,
+ "outputTokens": 2,
+ "latencyMs": 3832
+ },
+ {
+ "questionId": "q57",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "62",
+ "actual": "62",
+ "correct": true,
+ "inputTokens": 2984,
+ "outputTokens": 5,
+ "latencyMs": 1143
+ },
+ {
+ "questionId": "q57",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "62",
+ "actual": "66",
+ "correct": false,
+ "inputTokens": 2385,
+ "outputTokens": 2,
+ "latencyMs": 1370
+ },
+ {
+ "questionId": "q57",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "62",
+ "actual": "62",
+ "correct": true,
+ "inputTokens": 2858,
+ "outputTokens": 5,
+ "latencyMs": 1042
+ },
+ {
+ "questionId": "q57",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "62",
+ "actual": "54",
+ "correct": false,
+ "inputTokens": 6320,
+ "outputTokens": 2,
+ "latencyMs": 1015
+ },
+ {
+ "questionId": "q57",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "62",
+ "actual": "62",
+ "correct": true,
+ "inputTokens": 6367,
+ "outputTokens": 5,
+ "latencyMs": 1395
+ },
+ {
+ "questionId": "q57",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "62",
+ "actual": "54",
+ "correct": false,
+ "inputTokens": 5016,
+ "outputTokens": 2,
+ "latencyMs": 1008
+ },
+ {
+ "questionId": "q57",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "62",
+ "actual": "62",
+ "correct": true,
+ "inputTokens": 5762,
+ "outputTokens": 5,
+ "latencyMs": 1191
+ },
+ {
+ "questionId": "q58",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "45",
+ "actual": "38",
+ "correct": false,
+ "inputTokens": 6394,
+ "outputTokens": 2,
+ "latencyMs": 1304
+ },
+ {
+ "questionId": "q58",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "45",
+ "actual": "42",
+ "correct": false,
+ "inputTokens": 7872,
+ "outputTokens": 5,
+ "latencyMs": 1386
+ },
+ {
+ "questionId": "q58",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "45",
+ "actual": "38",
+ "correct": false,
+ "inputTokens": 2531,
+ "outputTokens": 2,
+ "latencyMs": 1433
+ },
+ {
+ "questionId": "q58",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "45",
+ "actual": "42",
+ "correct": false,
+ "inputTokens": 2984,
+ "outputTokens": 5,
+ "latencyMs": 967
+ },
+ {
+ "questionId": "q58",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "45",
+ "actual": "42",
+ "correct": false,
+ "inputTokens": 2385,
+ "outputTokens": 2,
+ "latencyMs": 2469
+ },
+ {
+ "questionId": "q58",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "45",
+ "actual": "42",
+ "correct": false,
+ "inputTokens": 2858,
+ "outputTokens": 5,
+ "latencyMs": 1382
+ },
+ {
+ "questionId": "q58",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "45",
+ "actual": "38",
+ "correct": false,
+ "inputTokens": 6320,
+ "outputTokens": 2,
+ "latencyMs": 1658
+ },
+ {
+ "questionId": "q58",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "45",
+ "actual": "42",
+ "correct": false,
+ "inputTokens": 6367,
+ "outputTokens": 5,
+ "latencyMs": 1450
+ },
+ {
+ "questionId": "q58",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "45",
+ "actual": "38",
+ "correct": false,
+ "inputTokens": 5016,
+ "outputTokens": 2,
+ "latencyMs": 1428
+ },
+ {
+ "questionId": "q58",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "45",
+ "actual": "38",
+ "correct": false,
+ "inputTokens": 5762,
+ "outputTokens": 5,
+ "latencyMs": 1144
+ },
+ {
+ "questionId": "q59",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "96.17",
+ "actual": "96.17",
+ "correct": true,
+ "inputTokens": 9740,
+ "outputTokens": 4,
+ "latencyMs": 1577
+ },
+ {
+ "questionId": "q59",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "96.17",
+ "actual": "96.17",
+ "correct": true,
+ "inputTokens": 11907,
+ "outputTokens": 7,
+ "latencyMs": 1181
+ },
+ {
+ "questionId": "q59",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "96.17",
+ "actual": "96.17",
+ "correct": true,
+ "inputTokens": 6014,
+ "outputTokens": 4,
+ "latencyMs": 1231
+ },
+ {
+ "questionId": "q59",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "96.17",
+ "actual": "96.17",
+ "correct": true,
+ "inputTokens": 6993,
+ "outputTokens": 7,
+ "latencyMs": 1407
+ },
+ {
+ "questionId": "q59",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "96.17",
+ "actual": "96.17",
+ "correct": true,
+ "inputTokens": 6782,
+ "outputTokens": 4,
+ "latencyMs": 1393
+ },
+ {
+ "questionId": "q59",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "96.17",
+ "actual": "96.17",
+ "correct": true,
+ "inputTokens": 8414,
+ "outputTokens": 7,
+ "latencyMs": 1534
+ },
+ {
+ "questionId": "q59",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "96.17",
+ "actual": "96.17",
+ "correct": true,
+ "inputTokens": 9159,
+ "outputTokens": 4,
+ "latencyMs": 1456
+ },
+ {
+ "questionId": "q59",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "96.17",
+ "actual": "96.17",
+ "correct": true,
+ "inputTokens": 9289,
+ "outputTokens": 7,
+ "latencyMs": 1933
+ },
+ {
+ "questionId": "q59",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "96.17",
+ "actual": "96.17",
+ "correct": true,
+ "inputTokens": 7374,
+ "outputTokens": 4,
+ "latencyMs": 1472
+ },
+ {
+ "questionId": "q59",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "96.17",
+ "actual": "96.17",
+ "correct": true,
+ "inputTokens": 8385,
+ "outputTokens": 7,
+ "latencyMs": 1224
+ },
+ {
+ "questionId": "q60",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "shipped",
+ "actual": "shipped",
+ "correct": true,
+ "inputTokens": 9739,
+ "outputTokens": 3,
+ "latencyMs": 2069
+ },
+ {
+ "questionId": "q60",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "shipped",
+ "actual": "shipped",
+ "correct": true,
+ "inputTokens": 11906,
+ "outputTokens": 4,
+ "latencyMs": 1172
+ },
+ {
+ "questionId": "q60",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "shipped",
+ "actual": "shipped",
+ "correct": true,
+ "inputTokens": 6013,
+ "outputTokens": 3,
+ "latencyMs": 1236
+ },
+ {
+ "questionId": "q60",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "shipped",
+ "actual": "shipped",
+ "correct": true,
+ "inputTokens": 6992,
+ "outputTokens": 4,
+ "latencyMs": 1157
+ },
+ {
+ "questionId": "q60",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "shipped",
+ "actual": "shipped",
+ "correct": true,
+ "inputTokens": 6781,
+ "outputTokens": 3,
+ "latencyMs": 1364
+ },
+ {
+ "questionId": "q60",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "shipped",
+ "actual": "shipped",
+ "correct": true,
+ "inputTokens": 8413,
+ "outputTokens": 4,
+ "latencyMs": 1041
+ },
+ {
+ "questionId": "q60",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "shipped",
+ "actual": "shipped",
+ "correct": true,
+ "inputTokens": 9158,
+ "outputTokens": 3,
+ "latencyMs": 1478
+ },
+ {
+ "questionId": "q60",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "shipped",
+ "actual": "shipped",
+ "correct": true,
+ "inputTokens": 9288,
+ "outputTokens": 4,
+ "latencyMs": 1266
+ },
+ {
+ "questionId": "q60",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "shipped",
+ "actual": "shipped",
+ "correct": true,
+ "inputTokens": 7373,
+ "outputTokens": 3,
+ "latencyMs": 3477
+ },
+ {
+ "questionId": "q60",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "shipped",
+ "actual": "shipped",
+ "correct": true,
+ "inputTokens": 8384,
+ "outputTokens": 4,
+ "latencyMs": 2630
+ },
+ {
+ "questionId": "q61",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "599.39",
+ "actual": "599.39",
+ "correct": true,
+ "inputTokens": 9740,
+ "outputTokens": 4,
+ "latencyMs": 1479
+ },
+ {
+ "questionId": "q61",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "599.39",
+ "actual": "599.39",
+ "correct": true,
+ "inputTokens": 11907,
+ "outputTokens": 7,
+ "latencyMs": 1270
+ },
+ {
+ "questionId": "q61",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "599.39",
+ "actual": "599.39",
+ "correct": true,
+ "inputTokens": 6014,
+ "outputTokens": 4,
+ "latencyMs": 1270
+ },
+ {
+ "questionId": "q61",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "599.39",
+ "actual": "599.39",
+ "correct": true,
+ "inputTokens": 6993,
+ "outputTokens": 7,
+ "latencyMs": 1342
+ },
+ {
+ "questionId": "q61",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "599.39",
+ "actual": "599.39",
+ "correct": true,
+ "inputTokens": 6782,
+ "outputTokens": 4,
+ "latencyMs": 1350
+ },
+ {
+ "questionId": "q61",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "599.39",
+ "actual": "599.39",
+ "correct": true,
+ "inputTokens": 8414,
+ "outputTokens": 7,
+ "latencyMs": 1205
+ },
+ {
+ "questionId": "q61",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "599.39",
+ "actual": "599.39",
+ "correct": true,
+ "inputTokens": 9159,
+ "outputTokens": 4,
+ "latencyMs": 1502
+ },
+ {
+ "questionId": "q61",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "599.39",
+ "actual": "599.39",
+ "correct": true,
+ "inputTokens": 9289,
+ "outputTokens": 7,
+ "latencyMs": 1571
+ },
+ {
+ "questionId": "q61",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "599.39",
+ "actual": "599.39",
+ "correct": true,
+ "inputTokens": 7374,
+ "outputTokens": 4,
+ "latencyMs": 2013
+ },
+ {
+ "questionId": "q61",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "599.39",
+ "actual": "599.39",
+ "correct": true,
+ "inputTokens": 8385,
+ "outputTokens": 7,
+ "latencyMs": 1428
+ },
+ {
+ "questionId": "q62",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "processing",
+ "actual": "processing",
+ "correct": true,
+ "inputTokens": 9739,
+ "outputTokens": 2,
+ "latencyMs": 1666
+ },
+ {
+ "questionId": "q62",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "processing",
+ "actual": "processing",
+ "correct": true,
+ "inputTokens": 11906,
+ "outputTokens": 4,
+ "latencyMs": 1549
+ },
+ {
+ "questionId": "q62",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "processing",
+ "actual": "processing",
+ "correct": true,
+ "inputTokens": 6013,
+ "outputTokens": 2,
+ "latencyMs": 1033
+ },
+ {
+ "questionId": "q62",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "processing",
+ "actual": "processing",
+ "correct": true,
+ "inputTokens": 6992,
+ "outputTokens": 4,
+ "latencyMs": 1061
+ },
+ {
+ "questionId": "q62",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "processing",
+ "actual": "processing",
+ "correct": true,
+ "inputTokens": 6781,
+ "outputTokens": 2,
+ "latencyMs": 2008
+ },
+ {
+ "questionId": "q62",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "processing",
+ "actual": "processing",
+ "correct": true,
+ "inputTokens": 8413,
+ "outputTokens": 4,
+ "latencyMs": 1214
+ },
+ {
+ "questionId": "q62",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "processing",
+ "actual": "processing",
+ "correct": true,
+ "inputTokens": 9158,
+ "outputTokens": 2,
+ "latencyMs": 1321
+ },
+ {
+ "questionId": "q62",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "processing",
+ "actual": "processing",
+ "correct": true,
+ "inputTokens": 9288,
+ "outputTokens": 4,
+ "latencyMs": 1311
+ },
+ {
+ "questionId": "q62",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "processing",
+ "actual": "processing",
+ "correct": true,
+ "inputTokens": 7373,
+ "outputTokens": 2,
+ "latencyMs": 1769
+ },
+ {
+ "questionId": "q62",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "processing",
+ "actual": "processing",
+ "correct": true,
+ "inputTokens": 8384,
+ "outputTokens": 4,
+ "latencyMs": 1157
+ },
+ {
+ "questionId": "q63",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "528.71",
+ "actual": "528.71",
+ "correct": true,
+ "inputTokens": 9740,
+ "outputTokens": 4,
+ "latencyMs": 1213
+ },
+ {
+ "questionId": "q63",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "528.71",
+ "actual": "528.71",
+ "correct": true,
+ "inputTokens": 11907,
+ "outputTokens": 7,
+ "latencyMs": 1332
+ },
+ {
+ "questionId": "q63",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "528.71",
+ "actual": "528.71",
+ "correct": true,
+ "inputTokens": 6014,
+ "outputTokens": 4,
+ "latencyMs": 3749
+ },
+ {
+ "questionId": "q63",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "528.71",
+ "actual": "528.71",
+ "correct": true,
+ "inputTokens": 6993,
+ "outputTokens": 7,
+ "latencyMs": 1326
+ },
+ {
+ "questionId": "q63",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "528.71",
+ "actual": "528.71",
+ "correct": true,
+ "inputTokens": 6782,
+ "outputTokens": 4,
+ "latencyMs": 947
+ },
+ {
+ "questionId": "q63",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "528.71",
+ "actual": "528.71",
+ "correct": true,
+ "inputTokens": 8414,
+ "outputTokens": 7,
+ "latencyMs": 1251
+ },
+ {
+ "questionId": "q63",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "528.71",
+ "actual": "528.71",
+ "correct": true,
+ "inputTokens": 9159,
+ "outputTokens": 4,
+ "latencyMs": 1428
+ },
+ {
+ "questionId": "q63",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "528.71",
+ "actual": "528.71",
+ "correct": true,
+ "inputTokens": 9289,
+ "outputTokens": 7,
+ "latencyMs": 1659
+ },
+ {
+ "questionId": "q63",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "528.71",
+ "actual": "528.71",
+ "correct": true,
+ "inputTokens": 7374,
+ "outputTokens": 4,
+ "latencyMs": 5584
+ },
+ {
+ "questionId": "q63",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "528.71",
+ "actual": "528.71",
+ "correct": true,
+ "inputTokens": 8385,
+ "outputTokens": 7,
+ "latencyMs": 1251
+ },
+ {
+ "questionId": "q64",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "pending",
+ "actual": "pending",
+ "correct": true,
+ "inputTokens": 9739,
+ "outputTokens": 2,
+ "latencyMs": 2425
+ },
+ {
+ "questionId": "q64",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "pending",
+ "actual": "pending",
+ "correct": true,
+ "inputTokens": 11906,
+ "outputTokens": 4,
+ "latencyMs": 1481
+ },
+ {
+ "questionId": "q64",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "pending",
+ "actual": "pending",
+ "correct": true,
+ "inputTokens": 6013,
+ "outputTokens": 2,
+ "latencyMs": 1109
+ },
+ {
+ "questionId": "q64",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "pending",
+ "actual": "pending",
+ "correct": true,
+ "inputTokens": 6992,
+ "outputTokens": 4,
+ "latencyMs": 1048
+ },
+ {
+ "questionId": "q64",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "pending",
+ "actual": "pending",
+ "correct": true,
+ "inputTokens": 6781,
+ "outputTokens": 2,
+ "latencyMs": 1256
+ },
+ {
+ "questionId": "q64",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "pending",
+ "actual": "pending",
+ "correct": true,
+ "inputTokens": 8413,
+ "outputTokens": 4,
+ "latencyMs": 1117
+ },
+ {
+ "questionId": "q64",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "pending",
+ "actual": "pending",
+ "correct": true,
+ "inputTokens": 9158,
+ "outputTokens": 2,
+ "latencyMs": 1168
+ },
+ {
+ "questionId": "q64",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "pending",
+ "actual": "pending",
+ "correct": true,
+ "inputTokens": 9288,
+ "outputTokens": 4,
+ "latencyMs": 1504
+ },
+ {
+ "questionId": "q64",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "pending",
+ "actual": "pending",
+ "correct": true,
+ "inputTokens": 7373,
+ "outputTokens": 2,
+ "latencyMs": 1134
+ },
+ {
+ "questionId": "q64",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "pending",
+ "actual": "pending",
+ "correct": true,
+ "inputTokens": 8384,
+ "outputTokens": 4,
+ "latencyMs": 1059
+ },
+ {
+ "questionId": "q65",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "1687.82",
+ "actual": "1687.82",
+ "correct": true,
+ "inputTokens": 9740,
+ "outputTokens": 5,
+ "latencyMs": 2361
+ },
+ {
+ "questionId": "q65",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "1687.82",
+ "actual": "1687.82",
+ "correct": true,
+ "inputTokens": 11907,
+ "outputTokens": 8,
+ "latencyMs": 1158
+ },
+ {
+ "questionId": "q65",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "1687.82",
+ "actual": "1687.82",
+ "correct": true,
+ "inputTokens": 6014,
+ "outputTokens": 5,
+ "latencyMs": 1493
+ },
+ {
+ "questionId": "q65",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "1687.82",
+ "actual": "1687.82",
+ "correct": true,
+ "inputTokens": 6993,
+ "outputTokens": 8,
+ "latencyMs": 1068
+ },
+ {
+ "questionId": "q65",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "1687.82",
+ "actual": "1687.82",
+ "correct": true,
+ "inputTokens": 6782,
+ "outputTokens": 5,
+ "latencyMs": 1490
+ },
+ {
+ "questionId": "q65",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "1687.82",
+ "actual": "1687.82",
+ "correct": true,
+ "inputTokens": 8414,
+ "outputTokens": 8,
+ "latencyMs": 1386
+ },
+ {
+ "questionId": "q65",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "1687.82",
+ "actual": "1687.82",
+ "correct": true,
+ "inputTokens": 9159,
+ "outputTokens": 5,
+ "latencyMs": 1470
+ },
+ {
+ "questionId": "q65",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "1687.82",
+ "actual": "1687.82",
+ "correct": true,
+ "inputTokens": 9289,
+ "outputTokens": 8,
+ "latencyMs": 1189
+ },
+ {
+ "questionId": "q65",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "1687.82",
+ "actual": "1687.82",
+ "correct": true,
+ "inputTokens": 7374,
+ "outputTokens": 5,
+ "latencyMs": 2824
+ },
+ {
+ "questionId": "q65",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "1687.82",
+ "actual": "1687.82",
+ "correct": true,
+ "inputTokens": 8385,
+ "outputTokens": 8,
+ "latencyMs": 1565
+ },
+ {
+ "questionId": "q66",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "cancelled",
+ "actual": "cancelled",
+ "correct": true,
+ "inputTokens": 9739,
+ "outputTokens": 3,
+ "latencyMs": 1480
+ },
+ {
+ "questionId": "q66",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "cancelled",
+ "actual": "cancelled",
+ "correct": true,
+ "inputTokens": 11906,
+ "outputTokens": 4,
+ "latencyMs": 1354
+ },
+ {
+ "questionId": "q66",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "cancelled",
+ "actual": "cancelled",
+ "correct": true,
+ "inputTokens": 6013,
+ "outputTokens": 3,
+ "latencyMs": 5334
+ },
+ {
+ "questionId": "q66",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "cancelled",
+ "actual": "cancelled",
+ "correct": true,
+ "inputTokens": 6992,
+ "outputTokens": 4,
+ "latencyMs": 1158
+ },
+ {
+ "questionId": "q66",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "cancelled",
+ "actual": "cancelled",
+ "correct": true,
+ "inputTokens": 6781,
+ "outputTokens": 3,
+ "latencyMs": 2043
+ },
+ {
+ "questionId": "q66",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "cancelled",
+ "actual": "cancelled",
+ "correct": true,
+ "inputTokens": 8413,
+ "outputTokens": 4,
+ "latencyMs": 1302
+ },
+ {
+ "questionId": "q66",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "cancelled",
+ "actual": "cancelled",
+ "correct": true,
+ "inputTokens": 9158,
+ "outputTokens": 3,
+ "latencyMs": 1006
+ },
+ {
+ "questionId": "q66",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "cancelled",
+ "actual": "cancelled",
+ "correct": true,
+ "inputTokens": 9288,
+ "outputTokens": 4,
+ "latencyMs": 1106
+ },
+ {
+ "questionId": "q66",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "cancelled",
+ "actual": "cancelled",
+ "correct": true,
+ "inputTokens": 7373,
+ "outputTokens": 3,
+ "latencyMs": 1801
+ },
+ {
+ "questionId": "q66",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "cancelled",
+ "actual": "cancelled",
+ "correct": true,
+ "inputTokens": 8384,
+ "outputTokens": 4,
+ "latencyMs": 1626
+ },
+ {
+ "questionId": "q67",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "423.6",
+ "actual": "423.6",
+ "correct": true,
+ "inputTokens": 9740,
+ "outputTokens": 4,
+ "latencyMs": 2107
+ },
+ {
+ "questionId": "q67",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "423.6",
+ "actual": "423.6",
+ "correct": true,
+ "inputTokens": 11907,
+ "outputTokens": 7,
+ "latencyMs": 1183
+ },
+ {
+ "questionId": "q67",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "423.6",
+ "actual": "423.6",
+ "correct": true,
+ "inputTokens": 6014,
+ "outputTokens": 4,
+ "latencyMs": 7091
+ },
+ {
+ "questionId": "q67",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "423.6",
+ "actual": "423.6",
+ "correct": true,
+ "inputTokens": 6993,
+ "outputTokens": 7,
+ "latencyMs": 1730
+ },
+ {
+ "questionId": "q67",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "423.6",
+ "actual": "423.6",
+ "correct": true,
+ "inputTokens": 6782,
+ "outputTokens": 4,
+ "latencyMs": 1222
+ },
+ {
+ "questionId": "q67",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "423.6",
+ "actual": "423.6",
+ "correct": true,
+ "inputTokens": 8414,
+ "outputTokens": 7,
+ "latencyMs": 1447
+ },
+ {
+ "questionId": "q67",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "423.6",
+ "actual": "423.6",
+ "correct": true,
+ "inputTokens": 9159,
+ "outputTokens": 4,
+ "latencyMs": 10295
+ },
+ {
+ "questionId": "q67",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "423.6",
+ "actual": "423.6",
+ "correct": true,
+ "inputTokens": 9289,
+ "outputTokens": 7,
+ "latencyMs": 1228
+ },
+ {
+ "questionId": "q67",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "423.6",
+ "actual": "423.6",
+ "correct": true,
+ "inputTokens": 7374,
+ "outputTokens": 4,
+ "latencyMs": 1748
+ },
+ {
+ "questionId": "q67",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "423.6",
+ "actual": "423.6",
+ "correct": true,
+ "inputTokens": 8385,
+ "outputTokens": 7,
+ "latencyMs": 1373
+ },
+ {
+ "questionId": "q68",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "delivered",
+ "actual": "delivered",
+ "correct": true,
+ "inputTokens": 9739,
+ "outputTokens": 3,
+ "latencyMs": 3836
+ },
+ {
+ "questionId": "q68",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "delivered",
+ "actual": "delivered",
+ "correct": true,
+ "inputTokens": 11906,
+ "outputTokens": 4,
+ "latencyMs": 1297
+ },
+ {
+ "questionId": "q68",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "delivered",
+ "actual": "delivered",
+ "correct": true,
+ "inputTokens": 6013,
+ "outputTokens": 3,
+ "latencyMs": 1927
+ },
+ {
+ "questionId": "q68",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "delivered",
+ "actual": "delivered",
+ "correct": true,
+ "inputTokens": 6992,
+ "outputTokens": 4,
+ "latencyMs": 1171
+ },
+ {
+ "questionId": "q68",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "delivered",
+ "actual": "delivered",
+ "correct": true,
+ "inputTokens": 6781,
+ "outputTokens": 3,
+ "latencyMs": 1551
+ },
+ {
+ "questionId": "q68",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "delivered",
+ "actual": "delivered",
+ "correct": true,
+ "inputTokens": 8413,
+ "outputTokens": 4,
+ "latencyMs": 1273
+ },
+ {
+ "questionId": "q68",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "delivered",
+ "actual": "delivered",
+ "correct": true,
+ "inputTokens": 9158,
+ "outputTokens": 3,
+ "latencyMs": 1387
+ },
+ {
+ "questionId": "q68",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "delivered",
+ "actual": "delivered",
+ "correct": true,
+ "inputTokens": 9288,
+ "outputTokens": 4,
+ "latencyMs": 1237
+ },
+ {
+ "questionId": "q68",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "delivered",
+ "actual": "delivered",
+ "correct": true,
+ "inputTokens": 7373,
+ "outputTokens": 3,
+ "latencyMs": 1934
+ },
+ {
+ "questionId": "q68",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "delivered",
+ "actual": "delivered",
+ "correct": true,
+ "inputTokens": 8384,
+ "outputTokens": 4,
+ "latencyMs": 1132
+ },
+ {
+ "questionId": "q69",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "784.03",
+ "actual": "784.03",
+ "correct": true,
+ "inputTokens": 9740,
+ "outputTokens": 4,
+ "latencyMs": 2267
+ },
+ {
+ "questionId": "q69",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "784.03",
+ "actual": "784.03",
+ "correct": true,
+ "inputTokens": 11907,
+ "outputTokens": 7,
+ "latencyMs": 1772
+ },
+ {
+ "questionId": "q69",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "784.03",
+ "actual": "784.03",
+ "correct": true,
+ "inputTokens": 6014,
+ "outputTokens": 4,
+ "latencyMs": 1315
+ },
+ {
+ "questionId": "q69",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "784.03",
+ "actual": "784.03",
+ "correct": true,
+ "inputTokens": 6993,
+ "outputTokens": 7,
+ "latencyMs": 1165
+ },
+ {
+ "questionId": "q69",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "784.03",
+ "actual": "784.03",
+ "correct": true,
+ "inputTokens": 6782,
+ "outputTokens": 4,
+ "latencyMs": 1097
+ },
+ {
+ "questionId": "q69",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "784.03",
+ "actual": "784.03",
+ "correct": true,
+ "inputTokens": 8414,
+ "outputTokens": 7,
+ "latencyMs": 1299
+ },
+ {
+ "questionId": "q69",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "784.03",
+ "actual": "784.03",
+ "correct": true,
+ "inputTokens": 9159,
+ "outputTokens": 4,
+ "latencyMs": 1779
+ },
+ {
+ "questionId": "q69",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "784.03",
+ "actual": "784.03",
+ "correct": true,
+ "inputTokens": 9289,
+ "outputTokens": 7,
+ "latencyMs": 3153
+ },
+ {
+ "questionId": "q69",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "784.03",
+ "actual": "784.03",
+ "correct": true,
+ "inputTokens": 7374,
+ "outputTokens": 4,
+ "latencyMs": 1813
+ },
+ {
+ "questionId": "q69",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "784.03",
+ "actual": "784.03",
+ "correct": true,
+ "inputTokens": 8385,
+ "outputTokens": 7,
+ "latencyMs": 1867
+ },
+ {
+ "questionId": "q70",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "shipped",
+ "actual": "shipped",
+ "correct": true,
+ "inputTokens": 9739,
+ "outputTokens": 3,
+ "latencyMs": 1611
+ },
+ {
+ "questionId": "q70",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "shipped",
+ "actual": "shipped",
+ "correct": true,
+ "inputTokens": 11906,
+ "outputTokens": 4,
+ "latencyMs": 1173
+ },
+ {
+ "questionId": "q70",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "shipped",
+ "actual": "shipped",
+ "correct": true,
+ "inputTokens": 6013,
+ "outputTokens": 3,
+ "latencyMs": 1977
+ },
+ {
+ "questionId": "q70",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "shipped",
+ "actual": "shipped",
+ "correct": true,
+ "inputTokens": 6992,
+ "outputTokens": 4,
+ "latencyMs": 1108
+ },
+ {
+ "questionId": "q70",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "shipped",
+ "actual": "shipped",
+ "correct": true,
+ "inputTokens": 6781,
+ "outputTokens": 3,
+ "latencyMs": 1324
+ },
+ {
+ "questionId": "q70",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "shipped",
+ "actual": "shipped",
+ "correct": true,
+ "inputTokens": 8413,
+ "outputTokens": 4,
+ "latencyMs": 1225
+ },
+ {
+ "questionId": "q70",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "shipped",
+ "actual": "shipped",
+ "correct": true,
+ "inputTokens": 9158,
+ "outputTokens": 3,
+ "latencyMs": 1416
+ },
+ {
+ "questionId": "q70",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "shipped",
+ "actual": "shipped",
+ "correct": true,
+ "inputTokens": 9288,
+ "outputTokens": 4,
+ "latencyMs": 1200
+ },
+ {
+ "questionId": "q70",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "shipped",
+ "actual": "shipped",
+ "correct": true,
+ "inputTokens": 7373,
+ "outputTokens": 3,
+ "latencyMs": 1259
+ },
+ {
+ "questionId": "q70",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "shipped",
+ "actual": "shipped",
+ "correct": true,
+ "inputTokens": 8384,
+ "outputTokens": 4,
+ "latencyMs": 1433
+ },
+ {
+ "questionId": "q71",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "645.88",
+ "actual": "645.88",
+ "correct": true,
+ "inputTokens": 9740,
+ "outputTokens": 4,
+ "latencyMs": 1729
+ },
+ {
+ "questionId": "q71",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "645.88",
+ "actual": "645.88",
+ "correct": true,
+ "inputTokens": 11907,
+ "outputTokens": 7,
+ "latencyMs": 1143
+ },
+ {
+ "questionId": "q71",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "645.88",
+ "actual": "645.88",
+ "correct": true,
+ "inputTokens": 6014,
+ "outputTokens": 4,
+ "latencyMs": 1837
+ },
+ {
+ "questionId": "q71",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "645.88",
+ "actual": "645.88",
+ "correct": true,
+ "inputTokens": 6993,
+ "outputTokens": 7,
+ "latencyMs": 1147
+ },
+ {
+ "questionId": "q71",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "645.88",
+ "actual": "645.88",
+ "correct": true,
+ "inputTokens": 6782,
+ "outputTokens": 4,
+ "latencyMs": 1777
+ },
+ {
+ "questionId": "q71",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "645.88",
+ "actual": "645.88",
+ "correct": true,
+ "inputTokens": 8414,
+ "outputTokens": 7,
+ "latencyMs": 1295
+ },
+ {
+ "questionId": "q71",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "645.88",
+ "actual": "645.88",
+ "correct": true,
+ "inputTokens": 9159,
+ "outputTokens": 4,
+ "latencyMs": 1081
+ },
+ {
+ "questionId": "q71",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "645.88",
+ "actual": "645.88",
+ "correct": true,
+ "inputTokens": 9289,
+ "outputTokens": 7,
+ "latencyMs": 1692
+ },
+ {
+ "questionId": "q71",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "645.88",
+ "actual": "645.88",
+ "correct": true,
+ "inputTokens": 7374,
+ "outputTokens": 4,
+ "latencyMs": 1661
+ },
+ {
+ "questionId": "q71",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "645.88",
+ "actual": "645.88",
+ "correct": true,
+ "inputTokens": 8385,
+ "outputTokens": 7,
+ "latencyMs": 1475
+ },
+ {
+ "questionId": "q72",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "processing",
+ "actual": "processing",
+ "correct": true,
+ "inputTokens": 9739,
+ "outputTokens": 2,
+ "latencyMs": 2979
+ },
+ {
+ "questionId": "q72",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "processing",
+ "actual": "processing",
+ "correct": true,
+ "inputTokens": 11906,
+ "outputTokens": 4,
+ "latencyMs": 1187
+ },
+ {
+ "questionId": "q72",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "processing",
+ "actual": "processing",
+ "correct": true,
+ "inputTokens": 6013,
+ "outputTokens": 2,
+ "latencyMs": 1620
+ },
+ {
+ "questionId": "q72",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "processing",
+ "actual": "processing",
+ "correct": true,
+ "inputTokens": 6992,
+ "outputTokens": 4,
+ "latencyMs": 1532
+ },
+ {
+ "questionId": "q72",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "processing",
+ "actual": "processing",
+ "correct": true,
+ "inputTokens": 6781,
+ "outputTokens": 2,
+ "latencyMs": 1616
+ },
+ {
+ "questionId": "q72",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "processing",
+ "actual": "processing",
+ "correct": true,
+ "inputTokens": 8413,
+ "outputTokens": 4,
+ "latencyMs": 1435
+ },
+ {
+ "questionId": "q72",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "processing",
+ "actual": "processing",
+ "correct": true,
+ "inputTokens": 9158,
+ "outputTokens": 2,
+ "latencyMs": 1190
+ },
+ {
+ "questionId": "q72",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "processing",
+ "actual": "processing",
+ "correct": true,
+ "inputTokens": 9288,
+ "outputTokens": 4,
+ "latencyMs": 1414
+ },
+ {
+ "questionId": "q72",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "processing",
+ "actual": "processing",
+ "correct": true,
+ "inputTokens": 7373,
+ "outputTokens": 2,
+ "latencyMs": 2335
+ },
+ {
+ "questionId": "q72",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "processing",
+ "actual": "processing",
+ "correct": true,
+ "inputTokens": 8384,
+ "outputTokens": 4,
+ "latencyMs": 1308
+ },
+ {
+ "questionId": "q73",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "371.91",
+ "actual": "371.91",
+ "correct": true,
+ "inputTokens": 9740,
+ "outputTokens": 4,
+ "latencyMs": 3359
+ },
+ {
+ "questionId": "q73",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "371.91",
+ "actual": "371.91",
+ "correct": true,
+ "inputTokens": 11907,
+ "outputTokens": 7,
+ "latencyMs": 1227
+ },
+ {
+ "questionId": "q73",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "371.91",
+ "actual": "371.91",
+ "correct": true,
+ "inputTokens": 6014,
+ "outputTokens": 4,
+ "latencyMs": 1439
+ },
+ {
+ "questionId": "q73",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "371.91",
+ "actual": "371.91",
+ "correct": true,
+ "inputTokens": 6993,
+ "outputTokens": 7,
+ "latencyMs": 1179
+ },
+ {
+ "questionId": "q73",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "371.91",
+ "actual": "371.91",
+ "correct": true,
+ "inputTokens": 6782,
+ "outputTokens": 4,
+ "latencyMs": 1064
+ },
+ {
+ "questionId": "q73",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "371.91",
+ "actual": "371.91",
+ "correct": true,
+ "inputTokens": 8414,
+ "outputTokens": 7,
+ "latencyMs": 1144
+ },
+ {
+ "questionId": "q73",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "371.91",
+ "actual": "371.91",
+ "correct": true,
+ "inputTokens": 9159,
+ "outputTokens": 4,
+ "latencyMs": 1873
+ },
+ {
+ "questionId": "q73",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "371.91",
+ "actual": "371.91",
+ "correct": true,
+ "inputTokens": 9289,
+ "outputTokens": 7,
+ "latencyMs": 1302
+ },
+ {
+ "questionId": "q73",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "371.91",
+ "actual": "371.91",
+ "correct": true,
+ "inputTokens": 7374,
+ "outputTokens": 4,
+ "latencyMs": 1956
+ },
+ {
+ "questionId": "q73",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "371.91",
+ "actual": "371.91",
+ "correct": true,
+ "inputTokens": 8385,
+ "outputTokens": 7,
+ "latencyMs": 1281
+ },
+ {
+ "questionId": "q74",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "pending",
+ "actual": "pending",
+ "correct": true,
+ "inputTokens": 9739,
+ "outputTokens": 2,
+ "latencyMs": 1591
+ },
+ {
+ "questionId": "q74",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "pending",
+ "actual": "pending",
+ "correct": true,
+ "inputTokens": 11906,
+ "outputTokens": 4,
+ "latencyMs": 1279
+ },
+ {
+ "questionId": "q74",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "pending",
+ "actual": "pending",
+ "correct": true,
+ "inputTokens": 6013,
+ "outputTokens": 2,
+ "latencyMs": 3152
+ },
+ {
+ "questionId": "q74",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "pending",
+ "actual": "pending",
+ "correct": true,
+ "inputTokens": 6992,
+ "outputTokens": 4,
+ "latencyMs": 1061
+ },
+ {
+ "questionId": "q74",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "pending",
+ "actual": "pending",
+ "correct": true,
+ "inputTokens": 6781,
+ "outputTokens": 2,
+ "latencyMs": 1557
+ },
+ {
+ "questionId": "q74",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "pending",
+ "actual": "pending",
+ "correct": true,
+ "inputTokens": 8413,
+ "outputTokens": 4,
+ "latencyMs": 1313
+ },
+ {
+ "questionId": "q74",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "pending",
+ "actual": "pending",
+ "correct": true,
+ "inputTokens": 9158,
+ "outputTokens": 2,
+ "latencyMs": 1433
+ },
+ {
+ "questionId": "q74",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "pending",
+ "actual": "pending",
+ "correct": true,
+ "inputTokens": 9288,
+ "outputTokens": 4,
+ "latencyMs": 1812
+ },
+ {
+ "questionId": "q74",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "pending",
+ "actual": "pending",
+ "correct": true,
+ "inputTokens": 7373,
+ "outputTokens": 2,
+ "latencyMs": 1024
+ },
+ {
+ "questionId": "q74",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "pending",
+ "actual": "pending",
+ "correct": true,
+ "inputTokens": 8384,
+ "outputTokens": 4,
+ "latencyMs": 1243
+ },
+ {
+ "questionId": "q75",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "1066",
+ "actual": "1066",
+ "correct": true,
+ "inputTokens": 9740,
+ "outputTokens": 3,
+ "latencyMs": 1500
+ },
+ {
+ "questionId": "q75",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "1066",
+ "actual": "1066",
+ "correct": true,
+ "inputTokens": 11907,
+ "outputTokens": 6,
+ "latencyMs": 1275
+ },
+ {
+ "questionId": "q75",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "1066",
+ "actual": "1066",
+ "correct": true,
+ "inputTokens": 6014,
+ "outputTokens": 3,
+ "latencyMs": 1841
+ },
+ {
+ "questionId": "q75",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "1066",
+ "actual": "1066",
+ "correct": true,
+ "inputTokens": 6993,
+ "outputTokens": 6,
+ "latencyMs": 1080
+ },
+ {
+ "questionId": "q75",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "1066",
+ "actual": "1066",
+ "correct": true,
+ "inputTokens": 6782,
+ "outputTokens": 3,
+ "latencyMs": 1209
+ },
+ {
+ "questionId": "q75",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "1066",
+ "actual": "1066",
+ "correct": true,
+ "inputTokens": 8414,
+ "outputTokens": 6,
+ "latencyMs": 1308
+ },
+ {
+ "questionId": "q75",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "1066",
+ "actual": "1066",
+ "correct": true,
+ "inputTokens": 9159,
+ "outputTokens": 3,
+ "latencyMs": 1556
+ },
+ {
+ "questionId": "q75",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "1066",
+ "actual": "1066",
+ "correct": true,
+ "inputTokens": 9289,
+ "outputTokens": 6,
+ "latencyMs": 1240
+ },
+ {
+ "questionId": "q75",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "1066",
+ "actual": "1066",
+ "correct": true,
+ "inputTokens": 7374,
+ "outputTokens": 3,
+ "latencyMs": 1254
+ },
+ {
+ "questionId": "q75",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "1066",
+ "actual": "1066",
+ "correct": true,
+ "inputTokens": 8385,
+ "outputTokens": 6,
+ "latencyMs": 1305
+ },
+ {
+ "questionId": "q76",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "cancelled",
+ "actual": "cancelled",
+ "correct": true,
+ "inputTokens": 9739,
+ "outputTokens": 3,
+ "latencyMs": 2606
+ },
+ {
+ "questionId": "q76",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "cancelled",
+ "actual": "cancelled",
+ "correct": true,
+ "inputTokens": 11906,
+ "outputTokens": 4,
+ "latencyMs": 1422
+ },
+ {
+ "questionId": "q76",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "cancelled",
+ "actual": "cancelled",
+ "correct": true,
+ "inputTokens": 6013,
+ "outputTokens": 3,
+ "latencyMs": 2688
+ },
+ {
+ "questionId": "q76",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "cancelled",
+ "actual": "cancelled",
+ "correct": true,
+ "inputTokens": 6992,
+ "outputTokens": 4,
+ "latencyMs": 1041
+ },
+ {
+ "questionId": "q76",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "cancelled",
+ "actual": "cancelled",
+ "correct": true,
+ "inputTokens": 6781,
+ "outputTokens": 3,
+ "latencyMs": 3070
+ },
+ {
+ "questionId": "q76",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "cancelled",
+ "actual": "cancelled",
+ "correct": true,
+ "inputTokens": 8413,
+ "outputTokens": 4,
+ "latencyMs": 1167
+ },
+ {
+ "questionId": "q76",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "cancelled",
+ "actual": "cancelled",
+ "correct": true,
+ "inputTokens": 9158,
+ "outputTokens": 3,
+ "latencyMs": 1702
+ },
+ {
+ "questionId": "q76",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "cancelled",
+ "actual": "cancelled",
+ "correct": true,
+ "inputTokens": 9288,
+ "outputTokens": 4,
+ "latencyMs": 1182
+ },
+ {
+ "questionId": "q76",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "cancelled",
+ "actual": "cancelled",
+ "correct": true,
+ "inputTokens": 7373,
+ "outputTokens": 3,
+ "latencyMs": 1740
+ },
+ {
+ "questionId": "q76",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "cancelled",
+ "actual": "cancelled",
+ "correct": true,
+ "inputTokens": 8384,
+ "outputTokens": 4,
+ "latencyMs": 1404
+ },
+ {
+ "questionId": "q77",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "1697.4",
+ "actual": "1697.4",
+ "correct": true,
+ "inputTokens": 9740,
+ "outputTokens": 5,
+ "latencyMs": 1596
+ },
+ {
+ "questionId": "q77",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "1697.4",
+ "actual": "1697.4",
+ "correct": true,
+ "inputTokens": 11907,
+ "outputTokens": 8,
+ "latencyMs": 2314
+ },
+ {
+ "questionId": "q77",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "1697.4",
+ "actual": "1697.4",
+ "correct": true,
+ "inputTokens": 6014,
+ "outputTokens": 5,
+ "latencyMs": 1114
+ },
+ {
+ "questionId": "q77",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "1697.4",
+ "actual": "1697.4",
+ "correct": true,
+ "inputTokens": 6993,
+ "outputTokens": 8,
+ "latencyMs": 1289
+ },
+ {
+ "questionId": "q77",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "1697.4",
+ "actual": "1697.4",
+ "correct": true,
+ "inputTokens": 6782,
+ "outputTokens": 5,
+ "latencyMs": 2428
+ },
+ {
+ "questionId": "q77",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "1697.4",
+ "actual": "1697.4",
+ "correct": true,
+ "inputTokens": 8414,
+ "outputTokens": 8,
+ "latencyMs": 1325
+ },
+ {
+ "questionId": "q77",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "1697.4",
+ "actual": "1697.4",
+ "correct": true,
+ "inputTokens": 9159,
+ "outputTokens": 5,
+ "latencyMs": 1343
+ },
+ {
+ "questionId": "q77",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "1697.4",
+ "actual": "1697.4",
+ "correct": true,
+ "inputTokens": 9289,
+ "outputTokens": 8,
+ "latencyMs": 1783
+ },
+ {
+ "questionId": "q77",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "1697.4",
+ "actual": "1697.4",
+ "correct": true,
+ "inputTokens": 7374,
+ "outputTokens": 5,
+ "latencyMs": 918
+ },
+ {
+ "questionId": "q77",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "1697.4",
+ "actual": "1697.4",
+ "correct": true,
+ "inputTokens": 8385,
+ "outputTokens": 8,
+ "latencyMs": 1308
+ },
+ {
+ "questionId": "q78",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "delivered",
+ "actual": "delivered",
+ "correct": true,
+ "inputTokens": 9739,
+ "outputTokens": 3,
+ "latencyMs": 1396
+ },
+ {
+ "questionId": "q78",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "delivered",
+ "actual": "delivered",
+ "correct": true,
+ "inputTokens": 11906,
+ "outputTokens": 4,
+ "latencyMs": 1225
+ },
+ {
+ "questionId": "q78",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "delivered",
+ "actual": "delivered",
+ "correct": true,
+ "inputTokens": 6013,
+ "outputTokens": 3,
+ "latencyMs": 2294
+ },
+ {
+ "questionId": "q78",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "delivered",
+ "actual": "delivered",
+ "correct": true,
+ "inputTokens": 6992,
+ "outputTokens": 4,
+ "latencyMs": 1418
+ },
+ {
+ "questionId": "q78",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "delivered",
+ "actual": "delivered",
+ "correct": true,
+ "inputTokens": 6781,
+ "outputTokens": 3,
+ "latencyMs": 1613
+ },
+ {
+ "questionId": "q78",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "delivered",
+ "actual": "delivered",
+ "correct": true,
+ "inputTokens": 8413,
+ "outputTokens": 4,
+ "latencyMs": 1374
+ },
+ {
+ "questionId": "q78",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "delivered",
+ "actual": "delivered",
+ "correct": true,
+ "inputTokens": 9158,
+ "outputTokens": 3,
+ "latencyMs": 1341
+ },
+ {
+ "questionId": "q78",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "delivered",
+ "actual": "delivered",
+ "correct": true,
+ "inputTokens": 9288,
+ "outputTokens": 4,
+ "latencyMs": 1223
+ },
+ {
+ "questionId": "q78",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "delivered",
+ "actual": "delivered",
+ "correct": true,
+ "inputTokens": 7373,
+ "outputTokens": 3,
+ "latencyMs": 2230
+ },
+ {
+ "questionId": "q78",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "delivered",
+ "actual": "delivered",
+ "correct": true,
+ "inputTokens": 8384,
+ "outputTokens": 4,
+ "latencyMs": 1425
+ },
+ {
+ "questionId": "q79",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "Valerie Braun",
+ "actual": "Valerie Braun",
+ "correct": true,
+ "inputTokens": 9740,
+ "outputTokens": 4,
+ "latencyMs": 1377
+ },
+ {
+ "questionId": "q79",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "Valerie Braun",
+ "actual": "Valerie Braun",
+ "correct": true,
+ "inputTokens": 11907,
+ "outputTokens": 9,
+ "latencyMs": 1550
+ },
+ {
+ "questionId": "q79",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "Valerie Braun",
+ "actual": "Valerie Braun",
+ "correct": true,
+ "inputTokens": 6014,
+ "outputTokens": 4,
+ "latencyMs": 1394
+ },
+ {
+ "questionId": "q79",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "Valerie Braun",
+ "actual": "Valerie Braun",
+ "correct": true,
+ "inputTokens": 6993,
+ "outputTokens": 9,
+ "latencyMs": 1202
+ },
+ {
+ "questionId": "q79",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "Valerie Braun",
+ "actual": "Valerie Braun",
+ "correct": true,
+ "inputTokens": 6782,
+ "outputTokens": 4,
+ "latencyMs": 1435
+ },
+ {
+ "questionId": "q79",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "Valerie Braun",
+ "actual": "Valerie Braun",
+ "correct": true,
+ "inputTokens": 8414,
+ "outputTokens": 9,
+ "latencyMs": 1277
+ },
+ {
+ "questionId": "q79",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "Valerie Braun",
+ "actual": "Valerie Braun",
+ "correct": true,
+ "inputTokens": 9159,
+ "outputTokens": 4,
+ "latencyMs": 1564
+ },
+ {
+ "questionId": "q79",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "Valerie Braun",
+ "actual": "Valerie Braun",
+ "correct": true,
+ "inputTokens": 9289,
+ "outputTokens": 9,
+ "latencyMs": 1200
+ },
+ {
+ "questionId": "q79",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "Valerie Braun",
+ "actual": "Valerie Braun",
+ "correct": true,
+ "inputTokens": 7374,
+ "outputTokens": 4,
+ "latencyMs": 1596
+ },
+ {
+ "questionId": "q79",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "Valerie Braun",
+ "actual": "Valerie Braun",
+ "correct": true,
+ "inputTokens": 8385,
+ "outputTokens": 9,
+ "latencyMs": 1151
+ },
+ {
+ "questionId": "q80",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "Anita Kozey",
+ "actual": "Anita Kozey",
+ "correct": true,
+ "inputTokens": 9740,
+ "outputTokens": 5,
+ "latencyMs": 1458
+ },
+ {
+ "questionId": "q80",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "Anita Kozey",
+ "actual": "Anita Kozey",
+ "correct": true,
+ "inputTokens": 11907,
+ "outputTokens": 9,
+ "latencyMs": 1283
+ },
+ {
+ "questionId": "q80",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "Anita Kozey",
+ "actual": "Anita Kozey",
+ "correct": true,
+ "inputTokens": 6014,
+ "outputTokens": 5,
+ "latencyMs": 4702
+ },
+ {
+ "questionId": "q80",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "Anita Kozey",
+ "actual": "Anita Kozey",
+ "correct": true,
+ "inputTokens": 6993,
+ "outputTokens": 9,
+ "latencyMs": 1360
+ },
+ {
+ "questionId": "q80",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "Anita Kozey",
+ "actual": "Anita Kozey",
+ "correct": true,
+ "inputTokens": 6782,
+ "outputTokens": 5,
+ "latencyMs": 6167
+ },
+ {
+ "questionId": "q80",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "Anita Kozey",
+ "actual": "Anita Kozey",
+ "correct": true,
+ "inputTokens": 8414,
+ "outputTokens": 9,
+ "latencyMs": 1449
+ },
+ {
+ "questionId": "q80",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "Anita Kozey",
+ "actual": "Anita Kozey",
+ "correct": true,
+ "inputTokens": 9159,
+ "outputTokens": 5,
+ "latencyMs": 6096
+ },
+ {
+ "questionId": "q80",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "Anita Kozey",
+ "actual": "Anita Kozey",
+ "correct": true,
+ "inputTokens": 9289,
+ "outputTokens": 9,
+ "latencyMs": 1194
+ },
+ {
+ "questionId": "q80",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "Anita Kozey",
+ "actual": "Anita Kozey",
+ "correct": true,
+ "inputTokens": 7374,
+ "outputTokens": 5,
+ "latencyMs": 7357
+ },
+ {
+ "questionId": "q80",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "Anita Kozey",
+ "actual": "Anita Kozey",
+ "correct": true,
+ "inputTokens": 8385,
+ "outputTokens": 9,
+ "latencyMs": 1213
+ },
+ {
+ "questionId": "q81",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "Elmer Kub PhD",
+ "actual": "Elmer Kub PhD",
+ "correct": true,
+ "inputTokens": 9740,
+ "outputTokens": 6,
+ "latencyMs": 2539
+ },
+ {
+ "questionId": "q81",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "Elmer Kub PhD",
+ "actual": "Elmer Kub PhD",
+ "correct": true,
+ "inputTokens": 11907,
+ "outputTokens": 10,
+ "latencyMs": 1532
+ },
+ {
+ "questionId": "q81",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "Elmer Kub PhD",
+ "actual": "Elmer Kub PhD",
+ "correct": true,
+ "inputTokens": 6014,
+ "outputTokens": 6,
+ "latencyMs": 2960
+ },
+ {
+ "questionId": "q81",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "Elmer Kub PhD",
+ "actual": "Elmer Kub PhD",
+ "correct": true,
+ "inputTokens": 6993,
+ "outputTokens": 10,
+ "latencyMs": 1547
+ },
+ {
+ "questionId": "q81",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "Elmer Kub PhD",
+ "actual": "Elmer Kub PhD",
+ "correct": true,
+ "inputTokens": 6782,
+ "outputTokens": 6,
+ "latencyMs": 1358
+ },
+ {
+ "questionId": "q81",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "Elmer Kub PhD",
+ "actual": "Elmer Kub PhD",
+ "correct": true,
+ "inputTokens": 8414,
+ "outputTokens": 10,
+ "latencyMs": 1424
+ },
+ {
+ "questionId": "q81",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "Elmer Kub PhD",
+ "actual": "Elmer Kub PhD",
+ "correct": true,
+ "inputTokens": 9159,
+ "outputTokens": 6,
+ "latencyMs": 958
+ },
+ {
+ "questionId": "q81",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "Elmer Kub PhD",
+ "actual": "Elmer Kub PhD",
+ "correct": true,
+ "inputTokens": 9289,
+ "outputTokens": 10,
+ "latencyMs": 1381
+ },
+ {
+ "questionId": "q81",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "Elmer Kub PhD",
+ "actual": "Elmer Kub PhD",
+ "correct": true,
+ "inputTokens": 7374,
+ "outputTokens": 6,
+ "latencyMs": 1372
+ },
+ {
+ "questionId": "q81",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "Elmer Kub PhD",
+ "actual": "Elmer Kub PhD",
+ "correct": true,
+ "inputTokens": 8385,
+ "outputTokens": 10,
+ "latencyMs": 1715
+ },
+ {
+ "questionId": "q82",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "Maxine Zemlak",
+ "actual": "Maxine Zemlak",
+ "correct": true,
+ "inputTokens": 9740,
+ "outputTokens": 5,
+ "latencyMs": 1972
+ },
+ {
+ "questionId": "q82",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "Maxine Zemlak",
+ "actual": "Maxine Zemlak",
+ "correct": true,
+ "inputTokens": 11907,
+ "outputTokens": 10,
+ "latencyMs": 1315
+ },
+ {
+ "questionId": "q82",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "Maxine Zemlak",
+ "actual": "Maxine Zemlak",
+ "correct": true,
+ "inputTokens": 6014,
+ "outputTokens": 5,
+ "latencyMs": 1634
+ },
+ {
+ "questionId": "q82",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "Maxine Zemlak",
+ "actual": "Maxine Zemlak",
+ "correct": true,
+ "inputTokens": 6993,
+ "outputTokens": 10,
+ "latencyMs": 1264
+ },
+ {
+ "questionId": "q82",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "Maxine Zemlak",
+ "actual": "Maxine Zemlak",
+ "correct": true,
+ "inputTokens": 6782,
+ "outputTokens": 5,
+ "latencyMs": 1153
+ },
+ {
+ "questionId": "q82",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "Maxine Zemlak",
+ "actual": "Maxine Zemlak",
+ "correct": true,
+ "inputTokens": 8414,
+ "outputTokens": 10,
+ "latencyMs": 1252
+ },
+ {
+ "questionId": "q82",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "Maxine Zemlak",
+ "actual": "Maxine Zemlak",
+ "correct": true,
+ "inputTokens": 9159,
+ "outputTokens": 5,
+ "latencyMs": 1697
+ },
+ {
+ "questionId": "q82",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "Maxine Zemlak",
+ "actual": "Maxine Zemlak",
+ "correct": true,
+ "inputTokens": 9289,
+ "outputTokens": 10,
+ "latencyMs": 1198
+ },
+ {
+ "questionId": "q82",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "Maxine Zemlak",
+ "actual": "Maxine Zemlak",
+ "correct": true,
+ "inputTokens": 7374,
+ "outputTokens": 5,
+ "latencyMs": 1854
+ },
+ {
+ "questionId": "q82",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "Maxine Zemlak",
+ "actual": "Maxine Zemlak",
+ "correct": true,
+ "inputTokens": 8385,
+ "outputTokens": 10,
+ "latencyMs": 1752
+ },
+ {
+ "questionId": "q83",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "Emanuel Littel",
+ "actual": "Emanuel Littel",
+ "correct": true,
+ "inputTokens": 9740,
+ "outputTokens": 5,
+ "latencyMs": 2076
+ },
+ {
+ "questionId": "q83",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "Emanuel Littel",
+ "actual": "Emanuel Littel",
+ "correct": true,
+ "inputTokens": 11907,
+ "outputTokens": 7,
+ "latencyMs": 1398
+ },
+ {
+ "questionId": "q83",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "Emanuel Littel",
+ "actual": "Emanuel Littel",
+ "correct": true,
+ "inputTokens": 6014,
+ "outputTokens": 5,
+ "latencyMs": 2263
+ },
+ {
+ "questionId": "q83",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "Emanuel Littel",
+ "actual": "Emanuel Littel",
+ "correct": true,
+ "inputTokens": 6993,
+ "outputTokens": 7,
+ "latencyMs": 3101
+ },
+ {
+ "questionId": "q83",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "Emanuel Littel",
+ "actual": "Emanuel Littel",
+ "correct": true,
+ "inputTokens": 6782,
+ "outputTokens": 5,
+ "latencyMs": 1453
+ },
+ {
+ "questionId": "q83",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "Emanuel Littel",
+ "actual": "Emanuel Littel",
+ "correct": true,
+ "inputTokens": 8414,
+ "outputTokens": 7,
+ "latencyMs": 1265
+ },
+ {
+ "questionId": "q83",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "Emanuel Littel",
+ "actual": "Emanuel Littel",
+ "correct": true,
+ "inputTokens": 9159,
+ "outputTokens": 5,
+ "latencyMs": 8807
+ },
+ {
+ "questionId": "q83",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "Emanuel Littel",
+ "actual": "Emanuel Littel",
+ "correct": true,
+ "inputTokens": 9289,
+ "outputTokens": 7,
+ "latencyMs": 1097
+ },
+ {
+ "questionId": "q83",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "Emanuel Littel",
+ "actual": "Emanuel Littel",
+ "correct": true,
+ "inputTokens": 7374,
+ "outputTokens": 5,
+ "latencyMs": 1667
+ },
+ {
+ "questionId": "q83",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "Emanuel Littel",
+ "actual": "Emanuel Littel",
+ "correct": true,
+ "inputTokens": 8385,
+ "outputTokens": 7,
+ "latencyMs": 1198
+ },
+ {
+ "questionId": "q84",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "Andrew Kling",
+ "actual": "Andrew Kling",
+ "correct": true,
+ "inputTokens": 9740,
+ "outputTokens": 3,
+ "latencyMs": 2292
+ },
+ {
+ "questionId": "q84",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "Andrew Kling",
+ "actual": "Andrew Kling",
+ "correct": true,
+ "inputTokens": 11907,
+ "outputTokens": 7,
+ "latencyMs": 1202
+ },
+ {
+ "questionId": "q84",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "Andrew Kling",
+ "actual": "Andrew Kling",
+ "correct": true,
+ "inputTokens": 6014,
+ "outputTokens": 3,
+ "latencyMs": 1801
+ },
+ {
+ "questionId": "q84",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "Andrew Kling",
+ "actual": "Andrew Kling",
+ "correct": true,
+ "inputTokens": 6993,
+ "outputTokens": 7,
+ "latencyMs": 1287
+ },
+ {
+ "questionId": "q84",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "Andrew Kling",
+ "actual": "Andrew Kling",
+ "correct": true,
+ "inputTokens": 6782,
+ "outputTokens": 3,
+ "latencyMs": 1340
+ },
+ {
+ "questionId": "q84",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "Andrew Kling",
+ "actual": "Andrew Kling",
+ "correct": true,
+ "inputTokens": 8414,
+ "outputTokens": 7,
+ "latencyMs": 1163
+ },
+ {
+ "questionId": "q84",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "Andrew Kling",
+ "actual": "Andrew Kling",
+ "correct": true,
+ "inputTokens": 9159,
+ "outputTokens": 3,
+ "latencyMs": 2685
+ },
+ {
+ "questionId": "q84",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "Andrew Kling",
+ "actual": "Andrew Kling",
+ "correct": true,
+ "inputTokens": 9289,
+ "outputTokens": 7,
+ "latencyMs": 1397
+ },
+ {
+ "questionId": "q84",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "Andrew Kling",
+ "actual": "Andrew Kling",
+ "correct": true,
+ "inputTokens": 7374,
+ "outputTokens": 3,
+ "latencyMs": 1289
+ },
+ {
+ "questionId": "q84",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "Andrew Kling",
+ "actual": "Andrew Kling",
+ "correct": true,
+ "inputTokens": 8385,
+ "outputTokens": 7,
+ "latencyMs": 1155
+ },
+ {
+ "questionId": "q85",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "Morris O'Hara",
+ "actual": "Morris O'Hara",
+ "correct": true,
+ "inputTokens": 9740,
+ "outputTokens": 6,
+ "latencyMs": 1601
+ },
+ {
+ "questionId": "q85",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "Morris O'Hara",
+ "actual": "Morris O'Hara",
+ "correct": true,
+ "inputTokens": 11907,
+ "outputTokens": 9,
+ "latencyMs": 1340
+ },
+ {
+ "questionId": "q85",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "Morris O'Hara",
+ "actual": "Morris O'Hara",
+ "correct": true,
+ "inputTokens": 6014,
+ "outputTokens": 6,
+ "latencyMs": 3525
+ },
+ {
+ "questionId": "q85",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "Morris O'Hara",
+ "actual": "Morris O'Hara",
+ "correct": true,
+ "inputTokens": 6993,
+ "outputTokens": 9,
+ "latencyMs": 1710
+ },
+ {
+ "questionId": "q85",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "Morris O'Hara",
+ "actual": "Morris O'Hara",
+ "correct": true,
+ "inputTokens": 6782,
+ "outputTokens": 6,
+ "latencyMs": 2333
+ },
+ {
+ "questionId": "q85",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "Morris O'Hara",
+ "actual": "Morris O'Hara",
+ "correct": true,
+ "inputTokens": 8414,
+ "outputTokens": 9,
+ "latencyMs": 1168
+ },
+ {
+ "questionId": "q85",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "Morris O'Hara",
+ "actual": "Morris O'Hara",
+ "correct": true,
+ "inputTokens": 9159,
+ "outputTokens": 6,
+ "latencyMs": 1781
+ },
+ {
+ "questionId": "q85",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "Morris O'Hara",
+ "actual": "Morris O'Hara",
+ "correct": true,
+ "inputTokens": 9289,
+ "outputTokens": 9,
+ "latencyMs": 1552
+ },
+ {
+ "questionId": "q85",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "Morris O'Hara",
+ "actual": "Morris O'Hara",
+ "correct": true,
+ "inputTokens": 7374,
+ "outputTokens": 6,
+ "latencyMs": 1584
+ },
+ {
+ "questionId": "q85",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "Morris O'Hara",
+ "actual": "Morris O'Hara",
+ "correct": true,
+ "inputTokens": 8385,
+ "outputTokens": 9,
+ "latencyMs": 1548
+ },
+ {
+ "questionId": "q86",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "Elijah Franecki",
+ "actual": "Elijah Franecki",
+ "correct": true,
+ "inputTokens": 9740,
+ "outputTokens": 6,
+ "latencyMs": 7230
+ },
+ {
+ "questionId": "q86",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "Elijah Franecki",
+ "actual": "Elijah Franecki",
+ "correct": true,
+ "inputTokens": 11907,
+ "outputTokens": 9,
+ "latencyMs": 1933
+ },
+ {
+ "questionId": "q86",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "Elijah Franecki",
+ "actual": "Elijah Franecki",
+ "correct": true,
+ "inputTokens": 6014,
+ "outputTokens": 6,
+ "latencyMs": 1067
+ },
+ {
+ "questionId": "q86",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "Elijah Franecki",
+ "actual": "Elijah Franecki",
+ "correct": true,
+ "inputTokens": 6993,
+ "outputTokens": 9,
+ "latencyMs": 1288
+ },
+ {
+ "questionId": "q86",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "Elijah Franecki",
+ "actual": "Elijah Franecki",
+ "correct": true,
+ "inputTokens": 6782,
+ "outputTokens": 6,
+ "latencyMs": 3954
+ },
+ {
+ "questionId": "q86",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "Elijah Franecki",
+ "actual": "Elijah Franecki",
+ "correct": true,
+ "inputTokens": 8414,
+ "outputTokens": 9,
+ "latencyMs": 1314
+ },
+ {
+ "questionId": "q86",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "Elijah Franecki",
+ "actual": "Elijah Franecki",
+ "correct": true,
+ "inputTokens": 9159,
+ "outputTokens": 6,
+ "latencyMs": 1334
+ },
+ {
+ "questionId": "q86",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "Elijah Franecki",
+ "actual": "Elijah Franecki",
+ "correct": true,
+ "inputTokens": 9289,
+ "outputTokens": 9,
+ "latencyMs": 2441
+ },
+ {
+ "questionId": "q86",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "Elijah Franecki",
+ "actual": "Elijah Franecki",
+ "correct": true,
+ "inputTokens": 7374,
+ "outputTokens": 6,
+ "latencyMs": 1650
+ },
+ {
+ "questionId": "q86",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "Elijah Franecki",
+ "actual": "Elijah Franecki",
+ "correct": true,
+ "inputTokens": 8385,
+ "outputTokens": 9,
+ "latencyMs": 1495
+ },
+ {
+ "questionId": "q87",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "Malcolm Erdman",
+ "actual": "Malcolm Erdman",
+ "correct": true,
+ "inputTokens": 9740,
+ "outputTokens": 5,
+ "latencyMs": 1262
+ },
+ {
+ "questionId": "q87",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "Malcolm Erdman",
+ "actual": "Malcolm Erdman",
+ "correct": true,
+ "inputTokens": 11907,
+ "outputTokens": 7,
+ "latencyMs": 1367
+ },
+ {
+ "questionId": "q87",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "Malcolm Erdman",
+ "actual": "Malcolm Erdman",
+ "correct": true,
+ "inputTokens": 6014,
+ "outputTokens": 5,
+ "latencyMs": 1385
+ },
+ {
+ "questionId": "q87",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "Malcolm Erdman",
+ "actual": "Malcolm Erdman",
+ "correct": true,
+ "inputTokens": 6993,
+ "outputTokens": 7,
+ "latencyMs": 1313
+ },
+ {
+ "questionId": "q87",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "Malcolm Erdman",
+ "actual": "Malcolm Erdman",
+ "correct": true,
+ "inputTokens": 6782,
+ "outputTokens": 5,
+ "latencyMs": 1141
+ },
+ {
+ "questionId": "q87",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "Malcolm Erdman",
+ "actual": "Malcolm Erdman",
+ "correct": true,
+ "inputTokens": 8414,
+ "outputTokens": 7,
+ "latencyMs": 1300
+ },
+ {
+ "questionId": "q87",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "Malcolm Erdman",
+ "actual": "Malcolm Erdman",
+ "correct": true,
+ "inputTokens": 9159,
+ "outputTokens": 5,
+ "latencyMs": 3347
+ },
+ {
+ "questionId": "q87",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "Malcolm Erdman",
+ "actual": "Malcolm Erdman",
+ "correct": true,
+ "inputTokens": 9289,
+ "outputTokens": 7,
+ "latencyMs": 1457
+ },
+ {
+ "questionId": "q87",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "Malcolm Erdman",
+ "actual": "Malcolm Erdman",
+ "correct": true,
+ "inputTokens": 7374,
+ "outputTokens": 5,
+ "latencyMs": 1276
+ },
+ {
+ "questionId": "q87",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "Malcolm Erdman",
+ "actual": "Malcolm Erdman",
+ "correct": true,
+ "inputTokens": 8385,
+ "outputTokens": 7,
+ "latencyMs": 1211
+ },
+ {
+ "questionId": "q88",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "Fannie Skiles",
+ "actual": "Fannie Skiles",
+ "correct": true,
+ "inputTokens": 9740,
+ "outputTokens": 5,
+ "latencyMs": 1635
+ },
+ {
+ "questionId": "q88",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "Fannie Skiles",
+ "actual": "Fannie Skiles",
+ "correct": true,
+ "inputTokens": 11907,
+ "outputTokens": 9,
+ "latencyMs": 1582
+ },
+ {
+ "questionId": "q88",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "Fannie Skiles",
+ "actual": "Fannie Skiles",
+ "correct": true,
+ "inputTokens": 6014,
+ "outputTokens": 5,
+ "latencyMs": 1695
+ },
+ {
+ "questionId": "q88",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "Fannie Skiles",
+ "actual": "Fannie Skiles",
+ "correct": true,
+ "inputTokens": 6993,
+ "outputTokens": 9,
+ "latencyMs": 1318
+ },
+ {
+ "questionId": "q88",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "Fannie Skiles",
+ "actual": "Fannie Skiles",
+ "correct": true,
+ "inputTokens": 6782,
+ "outputTokens": 5,
+ "latencyMs": 936
+ },
+ {
+ "questionId": "q88",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "Fannie Skiles",
+ "actual": "Fannie Skiles",
+ "correct": true,
+ "inputTokens": 8414,
+ "outputTokens": 9,
+ "latencyMs": 1204
+ },
+ {
+ "questionId": "q88",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "Fannie Skiles",
+ "actual": "Fannie Skiles",
+ "correct": true,
+ "inputTokens": 9159,
+ "outputTokens": 5,
+ "latencyMs": 996
+ },
+ {
+ "questionId": "q88",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "Fannie Skiles",
+ "actual": "Fannie Skiles",
+ "correct": true,
+ "inputTokens": 9289,
+ "outputTokens": 9,
+ "latencyMs": 1261
+ },
+ {
+ "questionId": "q88",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "Fannie Skiles",
+ "actual": "Fannie Skiles",
+ "correct": true,
+ "inputTokens": 7374,
+ "outputTokens": 5,
+ "latencyMs": 2276
+ },
+ {
+ "questionId": "q88",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "Fannie Skiles",
+ "actual": "Fannie Skiles",
+ "correct": true,
+ "inputTokens": 8385,
+ "outputTokens": 9,
+ "latencyMs": 1380
+ },
+ {
+ "questionId": "q89",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "Sonja Emmerich",
+ "actual": "Sonja Emmerich",
+ "correct": true,
+ "inputTokens": 9740,
+ "outputTokens": 6,
+ "latencyMs": 1451
+ },
+ {
+ "questionId": "q89",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "Sonja Emmerich",
+ "actual": "Sonja Emmerich",
+ "correct": true,
+ "inputTokens": 11907,
+ "outputTokens": 10,
+ "latencyMs": 1977
+ },
+ {
+ "questionId": "q89",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "Sonja Emmerich",
+ "actual": "Sonja Emmerich",
+ "correct": true,
+ "inputTokens": 6014,
+ "outputTokens": 6,
+ "latencyMs": 1376
+ },
+ {
+ "questionId": "q89",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "Sonja Emmerich",
+ "actual": "Sonja Emmerich",
+ "correct": true,
+ "inputTokens": 6993,
+ "outputTokens": 10,
+ "latencyMs": 1250
+ },
+ {
+ "questionId": "q89",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "Sonja Emmerich",
+ "actual": "Sonja Emmerich",
+ "correct": true,
+ "inputTokens": 6782,
+ "outputTokens": 6,
+ "latencyMs": 1273
+ },
+ {
+ "questionId": "q89",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "Sonja Emmerich",
+ "actual": "Sonja Emmerich",
+ "correct": true,
+ "inputTokens": 8414,
+ "outputTokens": 10,
+ "latencyMs": 1359
+ },
+ {
+ "questionId": "q89",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "Sonja Emmerich",
+ "actual": "Sonja Emmerich",
+ "correct": true,
+ "inputTokens": 9159,
+ "outputTokens": 6,
+ "latencyMs": 1791
+ },
+ {
+ "questionId": "q89",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "Sonja Emmerich",
+ "actual": "Sonja Emmerich",
+ "correct": true,
+ "inputTokens": 9289,
+ "outputTokens": 10,
+ "latencyMs": 1273
+ },
+ {
+ "questionId": "q89",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "Sonja Emmerich",
+ "actual": "Sonja Emmerich",
+ "correct": true,
+ "inputTokens": 7374,
+ "outputTokens": 6,
+ "latencyMs": 2832
+ },
+ {
+ "questionId": "q89",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "Sonja Emmerich",
+ "actual": "Sonja Emmerich",
+ "correct": true,
+ "inputTokens": 8385,
+ "outputTokens": 10,
+ "latencyMs": 1172
+ },
+ {
+ "questionId": "q90",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "Frank Emmerich DVM",
+ "actual": "Frank Emmerich DVM",
+ "correct": true,
+ "inputTokens": 9740,
+ "outputTokens": 7,
+ "latencyMs": 1491
+ },
+ {
+ "questionId": "q90",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "Frank Emmerich DVM",
+ "actual": "Frank Emmerich DVM",
+ "correct": true,
+ "inputTokens": 11907,
+ "outputTokens": 10,
+ "latencyMs": 1414
+ },
+ {
+ "questionId": "q90",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "Frank Emmerich DVM",
+ "actual": "Frank Emmerich DVM",
+ "correct": true,
+ "inputTokens": 6014,
+ "outputTokens": 7,
+ "latencyMs": 1396
+ },
+ {
+ "questionId": "q90",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "Frank Emmerich DVM",
+ "actual": "Frank Emmerich DVM",
+ "correct": true,
+ "inputTokens": 6993,
+ "outputTokens": 10,
+ "latencyMs": 1514
+ },
+ {
+ "questionId": "q90",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "Frank Emmerich DVM",
+ "actual": "Frank Emmerich DVM",
+ "correct": true,
+ "inputTokens": 6782,
+ "outputTokens": 7,
+ "latencyMs": 1573
+ },
+ {
+ "questionId": "q90",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "Frank Emmerich DVM",
+ "actual": "Frank Emmerich DVM",
+ "correct": true,
+ "inputTokens": 8414,
+ "outputTokens": 10,
+ "latencyMs": 1284
+ },
+ {
+ "questionId": "q90",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "Frank Emmerich DVM",
+ "actual": "Frank Emmerich DVM",
+ "correct": true,
+ "inputTokens": 9159,
+ "outputTokens": 7,
+ "latencyMs": 5400
+ },
+ {
+ "questionId": "q90",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "Frank Emmerich DVM",
+ "actual": "Frank Emmerich DVM",
+ "correct": true,
+ "inputTokens": 9289,
+ "outputTokens": 10,
+ "latencyMs": 1486
+ },
+ {
+ "questionId": "q90",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "Frank Emmerich DVM",
+ "actual": "Frank Emmerich DVM",
+ "correct": true,
+ "inputTokens": 7374,
+ "outputTokens": 7,
+ "latencyMs": 1420
+ },
+ {
+ "questionId": "q90",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "Frank Emmerich DVM",
+ "actual": "Frank Emmerich DVM",
+ "correct": true,
+ "inputTokens": 8385,
+ "outputTokens": 10,
+ "latencyMs": 1410
+ },
+ {
+ "questionId": "q91",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "Ronald Collins",
+ "actual": "Ronald Collins",
+ "correct": true,
+ "inputTokens": 9740,
+ "outputTokens": 4,
+ "latencyMs": 1248
+ },
+ {
+ "questionId": "q91",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "Ronald Collins",
+ "actual": "Ronald Collins",
+ "correct": true,
+ "inputTokens": 11907,
+ "outputTokens": 5,
+ "latencyMs": 1177
+ },
+ {
+ "questionId": "q91",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "Ronald Collins",
+ "actual": "Ronald Collins",
+ "correct": true,
+ "inputTokens": 6014,
+ "outputTokens": 4,
+ "latencyMs": 1601
+ },
+ {
+ "questionId": "q91",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "Ronald Collins",
+ "actual": "Ronald Collins",
+ "correct": true,
+ "inputTokens": 6993,
+ "outputTokens": 5,
+ "latencyMs": 1822
+ },
+ {
+ "questionId": "q91",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "Ronald Collins",
+ "actual": "Ronald Collins",
+ "correct": true,
+ "inputTokens": 6782,
+ "outputTokens": 4,
+ "latencyMs": 1103
+ },
+ {
+ "questionId": "q91",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "Ronald Collins",
+ "actual": "Ronald Collins",
+ "correct": true,
+ "inputTokens": 8414,
+ "outputTokens": 5,
+ "latencyMs": 1247
+ },
+ {
+ "questionId": "q91",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "Ronald Collins",
+ "actual": "Ronald Collins",
+ "correct": true,
+ "inputTokens": 9159,
+ "outputTokens": 4,
+ "latencyMs": 1184
+ },
+ {
+ "questionId": "q91",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "Ronald Collins",
+ "actual": "Ronald Collins",
+ "correct": true,
+ "inputTokens": 9289,
+ "outputTokens": 5,
+ "latencyMs": 1137
+ },
+ {
+ "questionId": "q91",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "Ronald Collins",
+ "actual": "Ronald Collins",
+ "correct": true,
+ "inputTokens": 7374,
+ "outputTokens": 4,
+ "latencyMs": 949
+ },
+ {
+ "questionId": "q91",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "Ronald Collins",
+ "actual": "Ronald Collins",
+ "correct": true,
+ "inputTokens": 8385,
+ "outputTokens": 5,
+ "latencyMs": 1143
+ },
+ {
+ "questionId": "q92",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "Jeannie Klein",
+ "actual": "Jeannie Klein",
+ "correct": true,
+ "inputTokens": 9740,
+ "outputTokens": 4,
+ "latencyMs": 1021
+ },
+ {
+ "questionId": "q92",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "Jeannie Klein",
+ "actual": "Jeannie Klein",
+ "correct": true,
+ "inputTokens": 11907,
+ "outputTokens": 8,
+ "latencyMs": 1301
+ },
+ {
+ "questionId": "q92",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "Jeannie Klein",
+ "actual": "Jeannie Klein",
+ "correct": true,
+ "inputTokens": 6014,
+ "outputTokens": 4,
+ "latencyMs": 1254
+ },
+ {
+ "questionId": "q92",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "Jeannie Klein",
+ "actual": "Jeannie Klein",
+ "correct": true,
+ "inputTokens": 6993,
+ "outputTokens": 8,
+ "latencyMs": 1375
+ },
+ {
+ "questionId": "q92",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "Jeannie Klein",
+ "actual": "Jeannie Klein",
+ "correct": true,
+ "inputTokens": 6782,
+ "outputTokens": 4,
+ "latencyMs": 1316
+ },
+ {
+ "questionId": "q92",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "Jeannie Klein",
+ "actual": "Jeannie Klein",
+ "correct": true,
+ "inputTokens": 8414,
+ "outputTokens": 8,
+ "latencyMs": 2681
+ },
+ {
+ "questionId": "q92",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "Jeannie Klein",
+ "actual": "Jeannie Klein",
+ "correct": true,
+ "inputTokens": 9159,
+ "outputTokens": 4,
+ "latencyMs": 2427
+ },
+ {
+ "questionId": "q92",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "Jeannie Klein",
+ "actual": "Jeannie Klein",
+ "correct": true,
+ "inputTokens": 9289,
+ "outputTokens": 8,
+ "latencyMs": 1526
+ },
+ {
+ "questionId": "q92",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "Jeannie Klein",
+ "actual": "Jeannie Klein",
+ "correct": true,
+ "inputTokens": 7374,
+ "outputTokens": 4,
+ "latencyMs": 1252
+ },
+ {
+ "questionId": "q92",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "Jeannie Klein",
+ "actual": "Jeannie Klein",
+ "correct": true,
+ "inputTokens": 8385,
+ "outputTokens": 8,
+ "latencyMs": 1324
+ },
+ {
+ "questionId": "q93",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "Joshua Watsica",
+ "actual": "Joshua Watsica",
+ "correct": true,
+ "inputTokens": 9740,
+ "outputTokens": 5,
+ "latencyMs": 1606
+ },
+ {
+ "questionId": "q93",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "Joshua Watsica",
+ "actual": "Joshua Watsica",
+ "correct": true,
+ "inputTokens": 11907,
+ "outputTokens": 8,
+ "latencyMs": 1223
+ },
+ {
+ "questionId": "q93",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "Joshua Watsica",
+ "actual": "Joshua Watsica",
+ "correct": true,
+ "inputTokens": 6014,
+ "outputTokens": 5,
+ "latencyMs": 1965
+ },
+ {
+ "questionId": "q93",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "Joshua Watsica",
+ "actual": "Joshua Watsica",
+ "correct": true,
+ "inputTokens": 6993,
+ "outputTokens": 8,
+ "latencyMs": 1300
+ },
+ {
+ "questionId": "q93",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "Joshua Watsica",
+ "actual": "Joshua Watsica",
+ "correct": true,
+ "inputTokens": 6782,
+ "outputTokens": 5,
+ "latencyMs": 1110
+ },
+ {
+ "questionId": "q93",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "Joshua Watsica",
+ "actual": "Joshua Watsica",
+ "correct": true,
+ "inputTokens": 8414,
+ "outputTokens": 8,
+ "latencyMs": 1819
+ },
+ {
+ "questionId": "q93",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "Joshua Watsica",
+ "actual": "Joshua Watsica",
+ "correct": true,
+ "inputTokens": 9159,
+ "outputTokens": 5,
+ "latencyMs": 1010
+ },
+ {
+ "questionId": "q93",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "Joshua Watsica",
+ "actual": "Joshua Watsica",
+ "correct": true,
+ "inputTokens": 9289,
+ "outputTokens": 8,
+ "latencyMs": 1224
+ },
+ {
+ "questionId": "q93",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "Joshua Watsica",
+ "actual": "Joshua Watsica",
+ "correct": true,
+ "inputTokens": 7374,
+ "outputTokens": 5,
+ "latencyMs": 1430
+ },
+ {
+ "questionId": "q93",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "Joshua Watsica",
+ "actual": "Joshua Watsica",
+ "correct": true,
+ "inputTokens": 8385,
+ "outputTokens": 8,
+ "latencyMs": 1158
+ },
+ {
+ "questionId": "q94",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "10",
+ "actual": "10",
+ "correct": true,
+ "inputTokens": 9736,
+ "outputTokens": 2,
+ "latencyMs": 1352
+ },
+ {
+ "questionId": "q94",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "10",
+ "actual": "8",
+ "correct": false,
+ "inputTokens": 11902,
+ "outputTokens": 5,
+ "latencyMs": 1498
+ },
+ {
+ "questionId": "q94",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "10",
+ "actual": "12",
+ "correct": false,
+ "inputTokens": 6010,
+ "outputTokens": 2,
+ "latencyMs": 1249
+ },
+ {
+ "questionId": "q94",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "10",
+ "actual": "8",
+ "correct": false,
+ "inputTokens": 6988,
+ "outputTokens": 5,
+ "latencyMs": 1080
+ },
+ {
+ "questionId": "q94",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "10",
+ "actual": "12",
+ "correct": false,
+ "inputTokens": 6778,
+ "outputTokens": 2,
+ "latencyMs": 1760
+ },
+ {
+ "questionId": "q94",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "10",
+ "actual": "8",
+ "correct": false,
+ "inputTokens": 8409,
+ "outputTokens": 5,
+ "latencyMs": 1156
+ },
+ {
+ "questionId": "q94",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "10",
+ "actual": "10",
+ "correct": true,
+ "inputTokens": 9155,
+ "outputTokens": 2,
+ "latencyMs": 9923
+ },
+ {
+ "questionId": "q94",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "10",
+ "actual": "8",
+ "correct": false,
+ "inputTokens": 9284,
+ "outputTokens": 5,
+ "latencyMs": 1138
+ },
+ {
+ "questionId": "q94",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "10",
+ "actual": "12",
+ "correct": false,
+ "inputTokens": 7370,
+ "outputTokens": 2,
+ "latencyMs": 1070
+ },
+ {
+ "questionId": "q94",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "10",
+ "actual": "8",
+ "correct": false,
+ "inputTokens": 8380,
+ "outputTokens": 5,
+ "latencyMs": 1114
+ },
+ {
+ "questionId": "q95",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "10",
+ "actual": "10",
+ "correct": true,
+ "inputTokens": 9736,
+ "outputTokens": 2,
+ "latencyMs": 830
+ },
+ {
+ "questionId": "q95",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "10",
+ "actual": "8",
+ "correct": false,
+ "inputTokens": 11902,
+ "outputTokens": 5,
+ "latencyMs": 1085
+ },
+ {
+ "questionId": "q95",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "10",
+ "actual": "10",
+ "correct": true,
+ "inputTokens": 6010,
+ "outputTokens": 2,
+ "latencyMs": 2362
+ },
+ {
+ "questionId": "q95",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "10",
+ "actual": "7",
+ "correct": false,
+ "inputTokens": 6988,
+ "outputTokens": 5,
+ "latencyMs": 1198
+ },
+ {
+ "questionId": "q95",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "10",
+ "actual": "10",
+ "correct": true,
+ "inputTokens": 6778,
+ "outputTokens": 2,
+ "latencyMs": 1630
+ },
+ {
+ "questionId": "q95",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "10",
+ "actual": "8",
+ "correct": false,
+ "inputTokens": 8409,
+ "outputTokens": 5,
+ "latencyMs": 1219
+ },
+ {
+ "questionId": "q95",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "10",
+ "actual": "10",
+ "correct": true,
+ "inputTokens": 9155,
+ "outputTokens": 2,
+ "latencyMs": 2666
+ },
+ {
+ "questionId": "q95",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "10",
+ "actual": "8",
+ "correct": false,
+ "inputTokens": 9284,
+ "outputTokens": 5,
+ "latencyMs": 1044
+ },
+ {
+ "questionId": "q95",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "10",
+ "actual": "12",
+ "correct": false,
+ "inputTokens": 7370,
+ "outputTokens": 2,
+ "latencyMs": 2187
+ },
+ {
+ "questionId": "q95",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "10",
+ "actual": "8",
+ "correct": false,
+ "inputTokens": 8380,
+ "outputTokens": 5,
+ "latencyMs": 1313
+ },
+ {
+ "questionId": "q96",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "10",
+ "actual": "20",
+ "correct": false,
+ "inputTokens": 9737,
+ "outputTokens": 2,
+ "latencyMs": 1087
+ },
+ {
+ "questionId": "q96",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "10",
+ "actual": "8",
+ "correct": false,
+ "inputTokens": 11902,
+ "outputTokens": 5,
+ "latencyMs": 1292
+ },
+ {
+ "questionId": "q96",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "10",
+ "actual": "15",
+ "correct": false,
+ "inputTokens": 6011,
+ "outputTokens": 2,
+ "latencyMs": 1979
+ },
+ {
+ "questionId": "q96",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "10",
+ "actual": "7",
+ "correct": false,
+ "inputTokens": 6988,
+ "outputTokens": 5,
+ "latencyMs": 1095
+ },
+ {
+ "questionId": "q96",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "10",
+ "actual": "15",
+ "correct": false,
+ "inputTokens": 6779,
+ "outputTokens": 2,
+ "latencyMs": 1385
+ },
+ {
+ "questionId": "q96",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "10",
+ "actual": "8",
+ "correct": false,
+ "inputTokens": 8409,
+ "outputTokens": 5,
+ "latencyMs": 1507
+ },
+ {
+ "questionId": "q96",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "10",
+ "actual": "10",
+ "correct": true,
+ "inputTokens": 9156,
+ "outputTokens": 2,
+ "latencyMs": 1579
+ },
+ {
+ "questionId": "q96",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "10",
+ "actual": "8",
+ "correct": false,
+ "inputTokens": 9284,
+ "outputTokens": 5,
+ "latencyMs": 1365
+ },
+ {
+ "questionId": "q96",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "10",
+ "actual": "20",
+ "correct": false,
+ "inputTokens": 7371,
+ "outputTokens": 2,
+ "latencyMs": 1661
+ },
+ {
+ "questionId": "q96",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "10",
+ "actual": "7",
+ "correct": false,
+ "inputTokens": 8380,
+ "outputTokens": 5,
+ "latencyMs": 1423
+ },
+ {
+ "questionId": "q97",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "10",
+ "actual": "15",
+ "correct": false,
+ "inputTokens": 9737,
+ "outputTokens": 2,
+ "latencyMs": 1815
+ },
+ {
+ "questionId": "q97",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "10",
+ "actual": "10",
+ "correct": true,
+ "inputTokens": 11902,
+ "outputTokens": 5,
+ "latencyMs": 1345
+ },
+ {
+ "questionId": "q97",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "10",
+ "actual": "10",
+ "correct": true,
+ "inputTokens": 6011,
+ "outputTokens": 2,
+ "latencyMs": 2193
+ },
+ {
+ "questionId": "q97",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "10",
+ "actual": "10",
+ "correct": true,
+ "inputTokens": 6988,
+ "outputTokens": 5,
+ "latencyMs": 1417
+ },
+ {
+ "questionId": "q97",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "10",
+ "actual": "15",
+ "correct": false,
+ "inputTokens": 6779,
+ "outputTokens": 2,
+ "latencyMs": 1721
+ },
+ {
+ "questionId": "q97",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "10",
+ "actual": "10",
+ "correct": true,
+ "inputTokens": 8409,
+ "outputTokens": 5,
+ "latencyMs": 1114
+ },
+ {
+ "questionId": "q97",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "10",
+ "actual": "15",
+ "correct": false,
+ "inputTokens": 9156,
+ "outputTokens": 2,
+ "latencyMs": 2208
+ },
+ {
+ "questionId": "q97",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "10",
+ "actual": "10",
+ "correct": true,
+ "inputTokens": 9284,
+ "outputTokens": 5,
+ "latencyMs": 1895
+ },
+ {
+ "questionId": "q97",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "10",
+ "actual": "15",
+ "correct": false,
+ "inputTokens": 7371,
+ "outputTokens": 2,
+ "latencyMs": 1287
+ },
+ {
+ "questionId": "q97",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "10",
+ "actual": "10",
+ "correct": true,
+ "inputTokens": 8380,
+ "outputTokens": 5,
+ "latencyMs": 1281
+ },
+ {
+ "questionId": "q98",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "10",
+ "actual": "10",
+ "correct": true,
+ "inputTokens": 9737,
+ "outputTokens": 2,
+ "latencyMs": 1387
+ },
+ {
+ "questionId": "q98",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "10",
+ "actual": "8",
+ "correct": false,
+ "inputTokens": 11902,
+ "outputTokens": 5,
+ "latencyMs": 1243
+ },
+ {
+ "questionId": "q98",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "10",
+ "actual": "10",
+ "correct": true,
+ "inputTokens": 6011,
+ "outputTokens": 2,
+ "latencyMs": 1284
+ },
+ {
+ "questionId": "q98",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "10",
+ "actual": "8",
+ "correct": false,
+ "inputTokens": 6988,
+ "outputTokens": 5,
+ "latencyMs": 1161
+ },
+ {
+ "questionId": "q98",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "10",
+ "actual": "15",
+ "correct": false,
+ "inputTokens": 6779,
+ "outputTokens": 2,
+ "latencyMs": 10406
+ },
+ {
+ "questionId": "q98",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "10",
+ "actual": "10",
+ "correct": true,
+ "inputTokens": 8409,
+ "outputTokens": 5,
+ "latencyMs": 1335
+ },
+ {
+ "questionId": "q98",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "10",
+ "actual": "10",
+ "correct": true,
+ "inputTokens": 9156,
+ "outputTokens": 2,
+ "latencyMs": 1517
+ },
+ {
+ "questionId": "q98",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "10",
+ "actual": "10",
+ "correct": true,
+ "inputTokens": 9284,
+ "outputTokens": 5,
+ "latencyMs": 1702
+ },
+ {
+ "questionId": "q98",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "10",
+ "actual": "10",
+ "correct": true,
+ "inputTokens": 7371,
+ "outputTokens": 2,
+ "latencyMs": 1676
+ },
+ {
+ "questionId": "q98",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "10",
+ "actual": "8",
+ "correct": false,
+ "inputTokens": 8380,
+ "outputTokens": 5,
+ "latencyMs": 1218
+ },
+ {
+ "questionId": "q99",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "42342.25",
+ "actual": "$50,000.00",
+ "correct": false,
+ "inputTokens": 9737,
+ "outputTokens": 7,
+ "latencyMs": 1407
+ },
+ {
+ "questionId": "q99",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "42342.25",
+ "actual": "50,847.47",
+ "correct": false,
+ "inputTokens": 11902,
+ "outputTokens": 9,
+ "latencyMs": 1443
+ },
+ {
+ "questionId": "q99",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "42342.25",
+ "actual": "Total revenue across all orders is 42,195.36.",
+ "correct": false,
+ "inputTokens": 6011,
+ "outputTokens": 14,
+ "latencyMs": 1150
+ },
+ {
+ "questionId": "q99",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "42342.25",
+ "actual": "41,847.47",
+ "correct": false,
+ "inputTokens": 6988,
+ "outputTokens": 9,
+ "latencyMs": 1774
+ },
+ {
+ "questionId": "q99",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "42342.25",
+ "actual": "$32,186.73",
+ "correct": false,
+ "inputTokens": 6779,
+ "outputTokens": 7,
+ "latencyMs": 2654
+ },
+ {
+ "questionId": "q99",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "42342.25",
+ "actual": "48,847.47",
+ "correct": false,
+ "inputTokens": 8409,
+ "outputTokens": 9,
+ "latencyMs": 1386
+ },
+ {
+ "questionId": "q99",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "42342.25",
+ "actual": "$34,186.73",
+ "correct": false,
+ "inputTokens": 9156,
+ "outputTokens": 7,
+ "latencyMs": 1506
+ },
+ {
+ "questionId": "q99",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "42342.25",
+ "actual": "48,847.47",
+ "correct": false,
+ "inputTokens": 9284,
+ "outputTokens": 9,
+ "latencyMs": 1509
+ },
+ {
+ "questionId": "q99",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "42342.25",
+ "actual": "Total revenue across all orders is 48780.73.",
+ "correct": false,
+ "inputTokens": 7371,
+ "outputTokens": 13,
+ "latencyMs": 1700
+ },
+ {
+ "questionId": "q99",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "42342.25",
+ "actual": "47,847.47",
+ "correct": false,
+ "inputTokens": 8380,
+ "outputTokens": 9,
+ "latencyMs": 1230
+ },
+ {
+ "questionId": "q100",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "44",
+ "actual": "36",
+ "correct": false,
+ "inputTokens": 9739,
+ "outputTokens": 2,
+ "latencyMs": 1725
+ },
+ {
+ "questionId": "q100",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "44",
+ "actual": "48",
+ "correct": false,
+ "inputTokens": 11904,
+ "outputTokens": 5,
+ "latencyMs": 1377
+ },
+ {
+ "questionId": "q100",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "44",
+ "actual": "34",
+ "correct": false,
+ "inputTokens": 6013,
+ "outputTokens": 2,
+ "latencyMs": 1399
+ },
+ {
+ "questionId": "q100",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "44",
+ "actual": "47",
+ "correct": false,
+ "inputTokens": 6990,
+ "outputTokens": 5,
+ "latencyMs": 1094
+ },
+ {
+ "questionId": "q100",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "44",
+ "actual": "34",
+ "correct": false,
+ "inputTokens": 6781,
+ "outputTokens": 2,
+ "latencyMs": 1617
+ },
+ {
+ "questionId": "q100",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "44",
+ "actual": "47",
+ "correct": false,
+ "inputTokens": 8411,
+ "outputTokens": 5,
+ "latencyMs": 1344
+ },
+ {
+ "questionId": "q100",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "44",
+ "actual": "36",
+ "correct": false,
+ "inputTokens": 9158,
+ "outputTokens": 2,
+ "latencyMs": 2396
+ },
+ {
+ "questionId": "q100",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "44",
+ "actual": "48",
+ "correct": false,
+ "inputTokens": 9286,
+ "outputTokens": 5,
+ "latencyMs": 1145
+ },
+ {
+ "questionId": "q100",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "44",
+ "actual": "36",
+ "correct": false,
+ "inputTokens": 7373,
+ "outputTokens": 2,
+ "latencyMs": 951
+ },
+ {
+ "questionId": "q100",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "44",
+ "actual": "45",
+ "correct": false,
+ "inputTokens": 8382,
+ "outputTokens": 5,
+ "latencyMs": 1311
+ },
+ {
+ "questionId": "q101",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "39",
+ "actual": "34",
+ "correct": false,
+ "inputTokens": 9739,
+ "outputTokens": 2,
+ "latencyMs": 866
+ },
+ {
+ "questionId": "q101",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "39",
+ "actual": "38",
+ "correct": false,
+ "inputTokens": 11904,
+ "outputTokens": 5,
+ "latencyMs": 1964
+ },
+ {
+ "questionId": "q101",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "39",
+ "actual": "30",
+ "correct": false,
+ "inputTokens": 6013,
+ "outputTokens": 2,
+ "latencyMs": 1994
+ },
+ {
+ "questionId": "q101",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "39",
+ "actual": "38",
+ "correct": false,
+ "inputTokens": 6990,
+ "outputTokens": 5,
+ "latencyMs": 1277
+ },
+ {
+ "questionId": "q101",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "39",
+ "actual": "32",
+ "correct": false,
+ "inputTokens": 6781,
+ "outputTokens": 2,
+ "latencyMs": 1884
+ },
+ {
+ "questionId": "q101",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "39",
+ "actual": "38",
+ "correct": false,
+ "inputTokens": 8411,
+ "outputTokens": 5,
+ "latencyMs": 1282
+ },
+ {
+ "questionId": "q101",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "39",
+ "actual": "32",
+ "correct": false,
+ "inputTokens": 9158,
+ "outputTokens": 2,
+ "latencyMs": 1761
+ },
+ {
+ "questionId": "q101",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "39",
+ "actual": "38",
+ "correct": false,
+ "inputTokens": 9286,
+ "outputTokens": 5,
+ "latencyMs": 1250
+ },
+ {
+ "questionId": "q101",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "39",
+ "actual": "32",
+ "correct": false,
+ "inputTokens": 7373,
+ "outputTokens": 2,
+ "latencyMs": 1316
+ },
+ {
+ "questionId": "q101",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "39",
+ "actual": "38",
+ "correct": false,
+ "inputTokens": 8382,
+ "outputTokens": 5,
+ "latencyMs": 1373
+ },
+ {
+ "questionId": "q102",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "32",
+ "actual": "27",
+ "correct": false,
+ "inputTokens": 9739,
+ "outputTokens": 2,
+ "latencyMs": 1389
+ },
+ {
+ "questionId": "q102",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "32",
+ "actual": "28",
+ "correct": false,
+ "inputTokens": 11904,
+ "outputTokens": 5,
+ "latencyMs": 1215
+ },
+ {
+ "questionId": "q102",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "32",
+ "actual": "24",
+ "correct": false,
+ "inputTokens": 6013,
+ "outputTokens": 2,
+ "latencyMs": 1034
+ },
+ {
+ "questionId": "q102",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "32",
+ "actual": "26",
+ "correct": false,
+ "inputTokens": 6990,
+ "outputTokens": 5,
+ "latencyMs": 1063
+ },
+ {
+ "questionId": "q102",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "32",
+ "actual": "25",
+ "correct": false,
+ "inputTokens": 6781,
+ "outputTokens": 2,
+ "latencyMs": 7312
+ },
+ {
+ "questionId": "q102",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "32",
+ "actual": "28",
+ "correct": false,
+ "inputTokens": 8411,
+ "outputTokens": 5,
+ "latencyMs": 1387
+ },
+ {
+ "questionId": "q102",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "32",
+ "actual": "27",
+ "correct": false,
+ "inputTokens": 9158,
+ "outputTokens": 2,
+ "latencyMs": 1488
+ },
+ {
+ "questionId": "q102",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "32",
+ "actual": "28",
+ "correct": false,
+ "inputTokens": 9286,
+ "outputTokens": 5,
+ "latencyMs": 1268
+ },
+ {
+ "questionId": "q102",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "32",
+ "actual": "27",
+ "correct": false,
+ "inputTokens": 7373,
+ "outputTokens": 2,
+ "latencyMs": 1274
+ },
+ {
+ "questionId": "q102",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "32",
+ "actual": "26",
+ "correct": false,
+ "inputTokens": 8382,
+ "outputTokens": 5,
+ "latencyMs": 1354
+ },
+ {
+ "questionId": "q103",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "6975",
+ "actual": "6975",
+ "correct": true,
+ "inputTokens": 3713,
+ "outputTokens": 3,
+ "latencyMs": 1330
+ },
+ {
+ "questionId": "q103",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "6975",
+ "actual": "6975",
+ "correct": true,
+ "inputTokens": 4080,
+ "outputTokens": 6,
+ "latencyMs": 1437
+ },
+ {
+ "questionId": "q103",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "6975",
+ "actual": "6975",
+ "correct": true,
+ "inputTokens": 1564,
+ "outputTokens": 3,
+ "latencyMs": 1341
+ },
+ {
+ "questionId": "q103",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "6975",
+ "actual": "6975",
+ "correct": true,
+ "inputTokens": 1509,
+ "outputTokens": 6,
+ "latencyMs": 1231
+ },
+ {
+ "questionId": "q103",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "6975",
+ "actual": "6975",
+ "correct": true,
+ "inputTokens": 1442,
+ "outputTokens": 3,
+ "latencyMs": 2515
+ },
+ {
+ "questionId": "q103",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "6975",
+ "actual": "6975",
+ "correct": true,
+ "inputTokens": 1445,
+ "outputTokens": 6,
+ "latencyMs": 1162
+ },
+ {
+ "questionId": "q103",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "6975",
+ "actual": "6975",
+ "correct": true,
+ "inputTokens": 3830,
+ "outputTokens": 3,
+ "latencyMs": 868
+ },
+ {
+ "questionId": "q103",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "6975",
+ "actual": "6975",
+ "correct": true,
+ "inputTokens": 3415,
+ "outputTokens": 6,
+ "latencyMs": 1149
+ },
+ {
+ "questionId": "q103",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "6975",
+ "actual": "6975",
+ "correct": true,
+ "inputTokens": 2986,
+ "outputTokens": 3,
+ "latencyMs": 1183
+ },
+ {
+ "questionId": "q103",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "6975",
+ "actual": "6975",
+ "correct": true,
+ "inputTokens": 3110,
+ "outputTokens": 6,
+ "latencyMs": 1119
+ },
+ {
+ "questionId": "q104",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "6686.23",
+ "actual": "6686.23",
+ "correct": true,
+ "inputTokens": 3712,
+ "outputTokens": 5,
+ "latencyMs": 1273
+ },
+ {
+ "questionId": "q104",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "6686.23",
+ "actual": "6686.23",
+ "correct": true,
+ "inputTokens": 4079,
+ "outputTokens": 8,
+ "latencyMs": 1371
+ },
+ {
+ "questionId": "q104",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "6686.23",
+ "actual": "6686.23",
+ "correct": true,
+ "inputTokens": 1563,
+ "outputTokens": 5,
+ "latencyMs": 2052
+ },
+ {
+ "questionId": "q104",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "6686.23",
+ "actual": "6686.23",
+ "correct": true,
+ "inputTokens": 1508,
+ "outputTokens": 8,
+ "latencyMs": 997
+ },
+ {
+ "questionId": "q104",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "6686.23",
+ "actual": "6686.23",
+ "correct": true,
+ "inputTokens": 1441,
+ "outputTokens": 5,
+ "latencyMs": 1152
+ },
+ {
+ "questionId": "q104",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "6686.23",
+ "actual": "6686.23",
+ "correct": true,
+ "inputTokens": 1444,
+ "outputTokens": 8,
+ "latencyMs": 1188
+ },
+ {
+ "questionId": "q104",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "6686.23",
+ "actual": "6686.23",
+ "correct": true,
+ "inputTokens": 3829,
+ "outputTokens": 5,
+ "latencyMs": 1259
+ },
+ {
+ "questionId": "q104",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "6686.23",
+ "actual": "6686.23",
+ "correct": true,
+ "inputTokens": 3414,
+ "outputTokens": 8,
+ "latencyMs": 1239
+ },
+ {
+ "questionId": "q104",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "6686.23",
+ "actual": "6686.23",
+ "correct": true,
+ "inputTokens": 2985,
+ "outputTokens": 5,
+ "latencyMs": 1096
+ },
+ {
+ "questionId": "q104",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "6686.23",
+ "actual": "6686.23",
+ "correct": true,
+ "inputTokens": 3109,
+ "outputTokens": 8,
+ "latencyMs": 1247
+ },
+ {
+ "questionId": "q105",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "7500",
+ "actual": "7500",
+ "correct": true,
+ "inputTokens": 3713,
+ "outputTokens": 3,
+ "latencyMs": 1354
+ },
+ {
+ "questionId": "q105",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "7500",
+ "actual": "7500",
+ "correct": true,
+ "inputTokens": 4080,
+ "outputTokens": 6,
+ "latencyMs": 1083
+ },
+ {
+ "questionId": "q105",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "7500",
+ "actual": "7500",
+ "correct": true,
+ "inputTokens": 1564,
+ "outputTokens": 3,
+ "latencyMs": 869
+ },
+ {
+ "questionId": "q105",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "7500",
+ "actual": "7500",
+ "correct": true,
+ "inputTokens": 1509,
+ "outputTokens": 6,
+ "latencyMs": 1051
+ },
+ {
+ "questionId": "q105",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "7500",
+ "actual": "7500",
+ "correct": true,
+ "inputTokens": 1442,
+ "outputTokens": 3,
+ "latencyMs": 1528
+ },
+ {
+ "questionId": "q105",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "7500",
+ "actual": "7500",
+ "correct": true,
+ "inputTokens": 1445,
+ "outputTokens": 6,
+ "latencyMs": 1126
+ },
+ {
+ "questionId": "q105",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "7500",
+ "actual": "7500",
+ "correct": true,
+ "inputTokens": 3830,
+ "outputTokens": 3,
+ "latencyMs": 1136
+ },
+ {
+ "questionId": "q105",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "7500",
+ "actual": "7500",
+ "correct": true,
+ "inputTokens": 3415,
+ "outputTokens": 6,
+ "latencyMs": 1121
+ },
+ {
+ "questionId": "q105",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "7500",
+ "actual": "7500",
+ "correct": true,
+ "inputTokens": 2986,
+ "outputTokens": 3,
+ "latencyMs": 1217
+ },
+ {
+ "questionId": "q105",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "7500",
+ "actual": "7500",
+ "correct": true,
+ "inputTokens": 3110,
+ "outputTokens": 6,
+ "latencyMs": 1099
+ },
+ {
+ "questionId": "q106",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "14297.05",
+ "actual": "14297.05",
+ "correct": true,
+ "inputTokens": 3712,
+ "outputTokens": 5,
+ "latencyMs": 1416
+ },
+ {
+ "questionId": "q106",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "14297.05",
+ "actual": "14297.05",
+ "correct": true,
+ "inputTokens": 4079,
+ "outputTokens": 8,
+ "latencyMs": 1526
+ },
+ {
+ "questionId": "q106",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "14297.05",
+ "actual": "14297.05",
+ "correct": true,
+ "inputTokens": 1563,
+ "outputTokens": 5,
+ "latencyMs": 1350
+ },
+ {
+ "questionId": "q106",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "14297.05",
+ "actual": "14297.05",
+ "correct": true,
+ "inputTokens": 1508,
+ "outputTokens": 8,
+ "latencyMs": 1330
+ },
+ {
+ "questionId": "q106",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "14297.05",
+ "actual": "14297.05",
+ "correct": true,
+ "inputTokens": 1441,
+ "outputTokens": 5,
+ "latencyMs": 2337
+ },
+ {
+ "questionId": "q106",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "14297.05",
+ "actual": "14297.05",
+ "correct": true,
+ "inputTokens": 1444,
+ "outputTokens": 8,
+ "latencyMs": 1171
+ },
+ {
+ "questionId": "q106",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "14297.05",
+ "actual": "14297.05",
+ "correct": true,
+ "inputTokens": 3829,
+ "outputTokens": 5,
+ "latencyMs": 3128
+ },
+ {
+ "questionId": "q106",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "14297.05",
+ "actual": "14297.05",
+ "correct": true,
+ "inputTokens": 3414,
+ "outputTokens": 8,
+ "latencyMs": 1151
+ },
+ {
+ "questionId": "q106",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "14297.05",
+ "actual": "14297.05",
+ "correct": true,
+ "inputTokens": 2985,
+ "outputTokens": 5,
+ "latencyMs": 1988
+ },
+ {
+ "questionId": "q106",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "14297.05",
+ "actual": "14297.05",
+ "correct": true,
+ "inputTokens": 3109,
+ "outputTokens": 8,
+ "latencyMs": 1166
+ },
+ {
+ "questionId": "q107",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "6692",
+ "actual": "6692",
+ "correct": true,
+ "inputTokens": 3713,
+ "outputTokens": 3,
+ "latencyMs": 2217
+ },
+ {
+ "questionId": "q107",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "6692",
+ "actual": "6692",
+ "correct": true,
+ "inputTokens": 4080,
+ "outputTokens": 6,
+ "latencyMs": 1114
+ },
+ {
+ "questionId": "q107",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "6692",
+ "actual": "6692",
+ "correct": true,
+ "inputTokens": 1564,
+ "outputTokens": 3,
+ "latencyMs": 1360
+ },
+ {
+ "questionId": "q107",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "6692",
+ "actual": "6692",
+ "correct": true,
+ "inputTokens": 1509,
+ "outputTokens": 6,
+ "latencyMs": 1079
+ },
+ {
+ "questionId": "q107",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "6692",
+ "actual": "6692",
+ "correct": true,
+ "inputTokens": 1442,
+ "outputTokens": 3,
+ "latencyMs": 1951
+ },
+ {
+ "questionId": "q107",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "6692",
+ "actual": "6692",
+ "correct": true,
+ "inputTokens": 1445,
+ "outputTokens": 6,
+ "latencyMs": 1173
+ },
+ {
+ "questionId": "q107",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "6692",
+ "actual": "6692",
+ "correct": true,
+ "inputTokens": 3830,
+ "outputTokens": 3,
+ "latencyMs": 1076
+ },
+ {
+ "questionId": "q107",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "6692",
+ "actual": "6692",
+ "correct": true,
+ "inputTokens": 3415,
+ "outputTokens": 6,
+ "latencyMs": 1098
+ },
+ {
+ "questionId": "q107",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "6692",
+ "actual": "6692",
+ "correct": true,
+ "inputTokens": 2986,
+ "outputTokens": 3,
+ "latencyMs": 1101
+ },
+ {
+ "questionId": "q107",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "6692",
+ "actual": "6692",
+ "correct": true,
+ "inputTokens": 3110,
+ "outputTokens": 6,
+ "latencyMs": 1254
+ },
+ {
+ "questionId": "q108",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "9302.76",
+ "actual": "9302.76",
+ "correct": true,
+ "inputTokens": 3712,
+ "outputTokens": 5,
+ "latencyMs": 2041
+ },
+ {
+ "questionId": "q108",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "9302.76",
+ "actual": "9302.76",
+ "correct": true,
+ "inputTokens": 4079,
+ "outputTokens": 8,
+ "latencyMs": 1405
+ },
+ {
+ "questionId": "q108",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "9302.76",
+ "actual": "9302.76",
+ "correct": true,
+ "inputTokens": 1563,
+ "outputTokens": 5,
+ "latencyMs": 1170
+ },
+ {
+ "questionId": "q108",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "9302.76",
+ "actual": "9302.76",
+ "correct": true,
+ "inputTokens": 1508,
+ "outputTokens": 8,
+ "latencyMs": 1161
+ },
+ {
+ "questionId": "q108",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "9302.76",
+ "actual": "9302.76",
+ "correct": true,
+ "inputTokens": 1441,
+ "outputTokens": 5,
+ "latencyMs": 1326
+ },
+ {
+ "questionId": "q108",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "9302.76",
+ "actual": "9302.76",
+ "correct": true,
+ "inputTokens": 1444,
+ "outputTokens": 8,
+ "latencyMs": 1259
+ },
+ {
+ "questionId": "q108",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "9302.76",
+ "actual": "9302.76",
+ "correct": true,
+ "inputTokens": 3829,
+ "outputTokens": 5,
+ "latencyMs": 3006
+ },
+ {
+ "questionId": "q108",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "9302.76",
+ "actual": "9302.76",
+ "correct": true,
+ "inputTokens": 3414,
+ "outputTokens": 8,
+ "latencyMs": 1461
+ },
+ {
+ "questionId": "q108",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "9302.76",
+ "actual": "9302.76",
+ "correct": true,
+ "inputTokens": 2985,
+ "outputTokens": 5,
+ "latencyMs": 3824
+ },
+ {
+ "questionId": "q108",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "9302.76",
+ "actual": "9302.76",
+ "correct": true,
+ "inputTokens": 3109,
+ "outputTokens": 8,
+ "latencyMs": 1391
+ },
+ {
+ "questionId": "q109",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "3285",
+ "actual": "3285",
+ "correct": true,
+ "inputTokens": 3713,
+ "outputTokens": 3,
+ "latencyMs": 1091
+ },
+ {
+ "questionId": "q109",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "3285",
+ "actual": "3285",
+ "correct": true,
+ "inputTokens": 4080,
+ "outputTokens": 6,
+ "latencyMs": 1188
+ },
+ {
+ "questionId": "q109",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "3285",
+ "actual": "3285",
+ "correct": true,
+ "inputTokens": 1564,
+ "outputTokens": 3,
+ "latencyMs": 1450
+ },
+ {
+ "questionId": "q109",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "3285",
+ "actual": "3285",
+ "correct": true,
+ "inputTokens": 1509,
+ "outputTokens": 6,
+ "latencyMs": 1614
+ },
+ {
+ "questionId": "q109",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "3285",
+ "actual": "3285",
+ "correct": true,
+ "inputTokens": 1442,
+ "outputTokens": 3,
+ "latencyMs": 1642
+ },
+ {
+ "questionId": "q109",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "3285",
+ "actual": "3285",
+ "correct": true,
+ "inputTokens": 1445,
+ "outputTokens": 6,
+ "latencyMs": 1311
+ },
+ {
+ "questionId": "q109",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "3285",
+ "actual": "3285",
+ "correct": true,
+ "inputTokens": 3830,
+ "outputTokens": 3,
+ "latencyMs": 1201
+ },
+ {
+ "questionId": "q109",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "3285",
+ "actual": "3285",
+ "correct": true,
+ "inputTokens": 3415,
+ "outputTokens": 6,
+ "latencyMs": 1261
+ },
+ {
+ "questionId": "q109",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "3285",
+ "actual": "3285",
+ "correct": true,
+ "inputTokens": 2986,
+ "outputTokens": 3,
+ "latencyMs": 856
+ },
+ {
+ "questionId": "q109",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "3285",
+ "actual": "3285",
+ "correct": true,
+ "inputTokens": 3110,
+ "outputTokens": 6,
+ "latencyMs": 980
+ },
+ {
+ "questionId": "q110",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "3826.93",
+ "actual": "3826.93",
+ "correct": true,
+ "inputTokens": 3712,
+ "outputTokens": 5,
+ "latencyMs": 3090
+ },
+ {
+ "questionId": "q110",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "3826.93",
+ "actual": "3826.93",
+ "correct": true,
+ "inputTokens": 4079,
+ "outputTokens": 8,
+ "latencyMs": 1123
+ },
+ {
+ "questionId": "q110",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "3826.93",
+ "actual": "3826.93",
+ "correct": true,
+ "inputTokens": 1563,
+ "outputTokens": 5,
+ "latencyMs": 2911
+ },
+ {
+ "questionId": "q110",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "3826.93",
+ "actual": "3826.93",
+ "correct": true,
+ "inputTokens": 1508,
+ "outputTokens": 8,
+ "latencyMs": 979
+ },
+ {
+ "questionId": "q110",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "3826.93",
+ "actual": "3826.93",
+ "correct": true,
+ "inputTokens": 1441,
+ "outputTokens": 5,
+ "latencyMs": 1118
+ },
+ {
+ "questionId": "q110",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "3826.93",
+ "actual": "3826.93",
+ "correct": true,
+ "inputTokens": 1444,
+ "outputTokens": 8,
+ "latencyMs": 943
+ },
+ {
+ "questionId": "q110",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "3826.93",
+ "actual": "3826.93",
+ "correct": true,
+ "inputTokens": 3829,
+ "outputTokens": 5,
+ "latencyMs": 2639
+ },
+ {
+ "questionId": "q110",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "3826.93",
+ "actual": "3826.93",
+ "correct": true,
+ "inputTokens": 3414,
+ "outputTokens": 8,
+ "latencyMs": 1187
+ },
+ {
+ "questionId": "q110",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "3826.93",
+ "actual": "3826.93",
+ "correct": true,
+ "inputTokens": 2985,
+ "outputTokens": 5,
+ "latencyMs": 2402
+ },
+ {
+ "questionId": "q110",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "3826.93",
+ "actual": "3826.93",
+ "correct": true,
+ "inputTokens": 3109,
+ "outputTokens": 8,
+ "latencyMs": 1723
+ },
+ {
+ "questionId": "q111",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "6191",
+ "actual": "6191",
+ "correct": true,
+ "inputTokens": 3713,
+ "outputTokens": 3,
+ "latencyMs": 2401
+ },
+ {
+ "questionId": "q111",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "6191",
+ "actual": "6191",
+ "correct": true,
+ "inputTokens": 4080,
+ "outputTokens": 6,
+ "latencyMs": 1117
+ },
+ {
+ "questionId": "q111",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "6191",
+ "actual": "6191",
+ "correct": true,
+ "inputTokens": 1564,
+ "outputTokens": 3,
+ "latencyMs": 1568
+ },
+ {
+ "questionId": "q111",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "6191",
+ "actual": "6191",
+ "correct": true,
+ "inputTokens": 1509,
+ "outputTokens": 6,
+ "latencyMs": 1132
+ },
+ {
+ "questionId": "q111",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "6191",
+ "actual": "6191",
+ "correct": true,
+ "inputTokens": 1442,
+ "outputTokens": 3,
+ "latencyMs": 1478
+ },
+ {
+ "questionId": "q111",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "6191",
+ "actual": "6191",
+ "correct": true,
+ "inputTokens": 1445,
+ "outputTokens": 6,
+ "latencyMs": 1831
+ },
+ {
+ "questionId": "q111",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "6191",
+ "actual": "6191",
+ "correct": true,
+ "inputTokens": 3830,
+ "outputTokens": 3,
+ "latencyMs": 1631
+ },
+ {
+ "questionId": "q111",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "6191",
+ "actual": "6191",
+ "correct": true,
+ "inputTokens": 3415,
+ "outputTokens": 6,
+ "latencyMs": 1371
+ },
+ {
+ "questionId": "q111",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "6191",
+ "actual": "6191",
+ "correct": true,
+ "inputTokens": 2986,
+ "outputTokens": 3,
+ "latencyMs": 1209
+ },
+ {
+ "questionId": "q111",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "6191",
+ "actual": "6191",
+ "correct": true,
+ "inputTokens": 3110,
+ "outputTokens": 6,
+ "latencyMs": 1411
+ },
+ {
+ "questionId": "q112",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "1854.66",
+ "actual": "1854.66",
+ "correct": true,
+ "inputTokens": 3712,
+ "outputTokens": 5,
+ "latencyMs": 1773
+ },
+ {
+ "questionId": "q112",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "1854.66",
+ "actual": "1854.66",
+ "correct": true,
+ "inputTokens": 4079,
+ "outputTokens": 8,
+ "latencyMs": 1090
+ },
+ {
+ "questionId": "q112",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "1854.66",
+ "actual": "1854.66",
+ "correct": true,
+ "inputTokens": 1563,
+ "outputTokens": 5,
+ "latencyMs": 1354
+ },
+ {
+ "questionId": "q112",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "1854.66",
+ "actual": "1854.66",
+ "correct": true,
+ "inputTokens": 1508,
+ "outputTokens": 8,
+ "latencyMs": 1095
+ },
+ {
+ "questionId": "q112",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "1854.66",
+ "actual": "1854.66",
+ "correct": true,
+ "inputTokens": 1441,
+ "outputTokens": 5,
+ "latencyMs": 1135
+ },
+ {
+ "questionId": "q112",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "1854.66",
+ "actual": "1854.66",
+ "correct": true,
+ "inputTokens": 1444,
+ "outputTokens": 8,
+ "latencyMs": 976
+ },
+ {
+ "questionId": "q112",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "1854.66",
+ "actual": "1854.66",
+ "correct": true,
+ "inputTokens": 3829,
+ "outputTokens": 5,
+ "latencyMs": 1311
+ },
+ {
+ "questionId": "q112",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "1854.66",
+ "actual": "1854.66",
+ "correct": true,
+ "inputTokens": 3414,
+ "outputTokens": 8,
+ "latencyMs": 1287
+ },
+ {
+ "questionId": "q112",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "1854.66",
+ "actual": "1854.66",
+ "correct": true,
+ "inputTokens": 2985,
+ "outputTokens": 5,
+ "latencyMs": 1288
+ },
+ {
+ "questionId": "q112",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "1854.66",
+ "actual": "1854.66",
+ "correct": true,
+ "inputTokens": 3109,
+ "outputTokens": 8,
+ "latencyMs": 1157
+ },
+ {
+ "questionId": "q113",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "4696",
+ "actual": "4696",
+ "correct": true,
+ "inputTokens": 3713,
+ "outputTokens": 3,
+ "latencyMs": 1328
+ },
+ {
+ "questionId": "q113",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "4696",
+ "actual": "4696",
+ "correct": true,
+ "inputTokens": 4080,
+ "outputTokens": 6,
+ "latencyMs": 1068
+ },
+ {
+ "questionId": "q113",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "4696",
+ "actual": "4696",
+ "correct": true,
+ "inputTokens": 1564,
+ "outputTokens": 3,
+ "latencyMs": 1020
+ },
+ {
+ "questionId": "q113",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "4696",
+ "actual": "4696",
+ "correct": true,
+ "inputTokens": 1509,
+ "outputTokens": 6,
+ "latencyMs": 1069
+ },
+ {
+ "questionId": "q113",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "4696",
+ "actual": "4696",
+ "correct": true,
+ "inputTokens": 1442,
+ "outputTokens": 3,
+ "latencyMs": 968
+ },
+ {
+ "questionId": "q113",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "4696",
+ "actual": "4696",
+ "correct": true,
+ "inputTokens": 1445,
+ "outputTokens": 6,
+ "latencyMs": 1436
+ },
+ {
+ "questionId": "q113",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "4696",
+ "actual": "4696",
+ "correct": true,
+ "inputTokens": 3830,
+ "outputTokens": 3,
+ "latencyMs": 1171
+ },
+ {
+ "questionId": "q113",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "4696",
+ "actual": "4696",
+ "correct": true,
+ "inputTokens": 3415,
+ "outputTokens": 6,
+ "latencyMs": 1273
+ },
+ {
+ "questionId": "q113",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "4696",
+ "actual": "4696",
+ "correct": true,
+ "inputTokens": 2986,
+ "outputTokens": 3,
+ "latencyMs": 1788
+ },
+ {
+ "questionId": "q113",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "4696",
+ "actual": "4696",
+ "correct": true,
+ "inputTokens": 3110,
+ "outputTokens": 6,
+ "latencyMs": 1050
+ },
+ {
+ "questionId": "q114",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "4211.6",
+ "actual": "4211.6",
+ "correct": true,
+ "inputTokens": 3712,
+ "outputTokens": 5,
+ "latencyMs": 1414
+ },
+ {
+ "questionId": "q114",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "4211.6",
+ "actual": "4211.6",
+ "correct": true,
+ "inputTokens": 4079,
+ "outputTokens": 8,
+ "latencyMs": 1192
+ },
+ {
+ "questionId": "q114",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "4211.6",
+ "actual": "4211.6",
+ "correct": true,
+ "inputTokens": 1563,
+ "outputTokens": 5,
+ "latencyMs": 893
+ },
+ {
+ "questionId": "q114",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "4211.6",
+ "actual": "4211.6",
+ "correct": true,
+ "inputTokens": 1508,
+ "outputTokens": 8,
+ "latencyMs": 1065
+ },
+ {
+ "questionId": "q114",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "4211.6",
+ "actual": "4211.6",
+ "correct": true,
+ "inputTokens": 1441,
+ "outputTokens": 5,
+ "latencyMs": 1155
+ },
+ {
+ "questionId": "q114",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "4211.6",
+ "actual": "4211.6",
+ "correct": true,
+ "inputTokens": 1444,
+ "outputTokens": 8,
+ "latencyMs": 1842
+ },
+ {
+ "questionId": "q114",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "4211.6",
+ "actual": "4211.6",
+ "correct": true,
+ "inputTokens": 3829,
+ "outputTokens": 5,
+ "latencyMs": 2740
+ },
+ {
+ "questionId": "q114",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "4211.6",
+ "actual": "4211.6",
+ "correct": true,
+ "inputTokens": 3414,
+ "outputTokens": 8,
+ "latencyMs": 1295
+ },
+ {
+ "questionId": "q114",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "4211.6",
+ "actual": "4211.6",
+ "correct": true,
+ "inputTokens": 2985,
+ "outputTokens": 5,
+ "latencyMs": 1053
+ },
+ {
+ "questionId": "q114",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "4211.6",
+ "actual": "4211.6",
+ "correct": true,
+ "inputTokens": 3109,
+ "outputTokens": 8,
+ "latencyMs": 1118
+ },
+ {
+ "questionId": "q115",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "6196",
+ "actual": "6196",
+ "correct": true,
+ "inputTokens": 3713,
+ "outputTokens": 3,
+ "latencyMs": 1452
+ },
+ {
+ "questionId": "q115",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "6196",
+ "actual": "6196",
+ "correct": true,
+ "inputTokens": 4080,
+ "outputTokens": 6,
+ "latencyMs": 1272
+ },
+ {
+ "questionId": "q115",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "6196",
+ "actual": "6196",
+ "correct": true,
+ "inputTokens": 1564,
+ "outputTokens": 3,
+ "latencyMs": 1039
+ },
+ {
+ "questionId": "q115",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "6196",
+ "actual": "6196",
+ "correct": true,
+ "inputTokens": 1509,
+ "outputTokens": 6,
+ "latencyMs": 1155
+ },
+ {
+ "questionId": "q115",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "6196",
+ "actual": "6196",
+ "correct": true,
+ "inputTokens": 1442,
+ "outputTokens": 3,
+ "latencyMs": 796
+ },
+ {
+ "questionId": "q115",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "6196",
+ "actual": "6196",
+ "correct": true,
+ "inputTokens": 1445,
+ "outputTokens": 6,
+ "latencyMs": 1048
+ },
+ {
+ "questionId": "q115",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "6196",
+ "actual": "6196",
+ "correct": true,
+ "inputTokens": 3830,
+ "outputTokens": 3,
+ "latencyMs": 2282
+ },
+ {
+ "questionId": "q115",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "6196",
+ "actual": "6196",
+ "correct": true,
+ "inputTokens": 3415,
+ "outputTokens": 6,
+ "latencyMs": 1592
+ },
+ {
+ "questionId": "q115",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "6196",
+ "actual": "6196",
+ "correct": true,
+ "inputTokens": 2986,
+ "outputTokens": 3,
+ "latencyMs": 2691
+ },
+ {
+ "questionId": "q115",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "6196",
+ "actual": "6196",
+ "correct": true,
+ "inputTokens": 3110,
+ "outputTokens": 6,
+ "latencyMs": 1126
+ },
+ {
+ "questionId": "q116",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "6105.3",
+ "actual": "6105.3",
+ "correct": true,
+ "inputTokens": 3712,
+ "outputTokens": 5,
+ "latencyMs": 1288
+ },
+ {
+ "questionId": "q116",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "6105.3",
+ "actual": "6105.30",
+ "correct": true,
+ "inputTokens": 4079,
+ "outputTokens": 8,
+ "latencyMs": 991
+ },
+ {
+ "questionId": "q116",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "6105.3",
+ "actual": "6105.3",
+ "correct": true,
+ "inputTokens": 1563,
+ "outputTokens": 5,
+ "latencyMs": 1257
+ },
+ {
+ "questionId": "q116",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "6105.3",
+ "actual": "6105.3",
+ "correct": true,
+ "inputTokens": 1508,
+ "outputTokens": 8,
+ "latencyMs": 1004
+ },
+ {
+ "questionId": "q116",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "6105.3",
+ "actual": "6105.3",
+ "correct": true,
+ "inputTokens": 1441,
+ "outputTokens": 5,
+ "latencyMs": 1620
+ },
+ {
+ "questionId": "q116",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "6105.3",
+ "actual": "6105.3",
+ "correct": true,
+ "inputTokens": 1444,
+ "outputTokens": 8,
+ "latencyMs": 991
+ },
+ {
+ "questionId": "q116",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "6105.3",
+ "actual": "6105.3",
+ "correct": true,
+ "inputTokens": 3829,
+ "outputTokens": 5,
+ "latencyMs": 1048
+ },
+ {
+ "questionId": "q116",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "6105.3",
+ "actual": "6105.3",
+ "correct": true,
+ "inputTokens": 3414,
+ "outputTokens": 8,
+ "latencyMs": 1189
+ },
+ {
+ "questionId": "q116",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "6105.3",
+ "actual": "6105.3",
+ "correct": true,
+ "inputTokens": 2985,
+ "outputTokens": 5,
+ "latencyMs": 3282
+ },
+ {
+ "questionId": "q116",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "6105.3",
+ "actual": "6105.3",
+ "correct": true,
+ "inputTokens": 3109,
+ "outputTokens": 8,
+ "latencyMs": 985
+ },
+ {
+ "questionId": "q117",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "6528",
+ "actual": "6528",
+ "correct": true,
+ "inputTokens": 3713,
+ "outputTokens": 3,
+ "latencyMs": 871
+ },
+ {
+ "questionId": "q117",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "6528",
+ "actual": "6528",
+ "correct": true,
+ "inputTokens": 4080,
+ "outputTokens": 6,
+ "latencyMs": 1042
+ },
+ {
+ "questionId": "q117",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "6528",
+ "actual": "6528",
+ "correct": true,
+ "inputTokens": 1564,
+ "outputTokens": 3,
+ "latencyMs": 999
+ },
+ {
+ "questionId": "q117",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "6528",
+ "actual": "6528",
+ "correct": true,
+ "inputTokens": 1509,
+ "outputTokens": 6,
+ "latencyMs": 1111
+ },
+ {
+ "questionId": "q117",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "6528",
+ "actual": "6528",
+ "correct": true,
+ "inputTokens": 1442,
+ "outputTokens": 3,
+ "latencyMs": 1132
+ },
+ {
+ "questionId": "q117",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "6528",
+ "actual": "6528",
+ "correct": true,
+ "inputTokens": 1445,
+ "outputTokens": 6,
+ "latencyMs": 1004
+ },
+ {
+ "questionId": "q117",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "6528",
+ "actual": "6528",
+ "correct": true,
+ "inputTokens": 3830,
+ "outputTokens": 3,
+ "latencyMs": 1162
+ },
+ {
+ "questionId": "q117",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "6528",
+ "actual": "6528",
+ "correct": true,
+ "inputTokens": 3415,
+ "outputTokens": 6,
+ "latencyMs": 1271
+ },
+ {
+ "questionId": "q117",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "6528",
+ "actual": "6528",
+ "correct": true,
+ "inputTokens": 2986,
+ "outputTokens": 3,
+ "latencyMs": 961
+ },
+ {
+ "questionId": "q117",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "6528",
+ "actual": "6528",
+ "correct": true,
+ "inputTokens": 3110,
+ "outputTokens": 6,
+ "latencyMs": 1289
+ },
+ {
+ "questionId": "q118",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "1136.09",
+ "actual": "1136.09",
+ "correct": true,
+ "inputTokens": 3712,
+ "outputTokens": 5,
+ "latencyMs": 1634
+ },
+ {
+ "questionId": "q118",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "1136.09",
+ "actual": "1136.09",
+ "correct": true,
+ "inputTokens": 4079,
+ "outputTokens": 8,
+ "latencyMs": 1198
+ },
+ {
+ "questionId": "q118",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "1136.09",
+ "actual": "1136.09",
+ "correct": true,
+ "inputTokens": 1563,
+ "outputTokens": 5,
+ "latencyMs": 2678
+ },
+ {
+ "questionId": "q118",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "1136.09",
+ "actual": "1136.09",
+ "correct": true,
+ "inputTokens": 1508,
+ "outputTokens": 8,
+ "latencyMs": 1155
+ },
+ {
+ "questionId": "q118",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "1136.09",
+ "actual": "1136.09",
+ "correct": true,
+ "inputTokens": 1441,
+ "outputTokens": 5,
+ "latencyMs": 1104
+ },
+ {
+ "questionId": "q118",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "1136.09",
+ "actual": "1136.09",
+ "correct": true,
+ "inputTokens": 1444,
+ "outputTokens": 8,
+ "latencyMs": 1109
+ },
+ {
+ "questionId": "q118",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "1136.09",
+ "actual": "1136.09",
+ "correct": true,
+ "inputTokens": 3829,
+ "outputTokens": 5,
+ "latencyMs": 3756
+ },
+ {
+ "questionId": "q118",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "1136.09",
+ "actual": "1136.09",
+ "correct": true,
+ "inputTokens": 3414,
+ "outputTokens": 8,
+ "latencyMs": 1082
+ },
+ {
+ "questionId": "q118",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "1136.09",
+ "actual": "1136.09",
+ "correct": true,
+ "inputTokens": 2985,
+ "outputTokens": 5,
+ "latencyMs": 1451
+ },
+ {
+ "questionId": "q118",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "1136.09",
+ "actual": "1136.09",
+ "correct": true,
+ "inputTokens": 3109,
+ "outputTokens": 8,
+ "latencyMs": 1730
+ },
+ {
+ "questionId": "q119",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "4689",
+ "actual": "4689",
+ "correct": true,
+ "inputTokens": 3713,
+ "outputTokens": 3,
+ "latencyMs": 1327
+ },
+ {
+ "questionId": "q119",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "4689",
+ "actual": "4689",
+ "correct": true,
+ "inputTokens": 4080,
+ "outputTokens": 6,
+ "latencyMs": 1282
+ },
+ {
+ "questionId": "q119",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "4689",
+ "actual": "4689",
+ "correct": true,
+ "inputTokens": 1564,
+ "outputTokens": 3,
+ "latencyMs": 1368
+ },
+ {
+ "questionId": "q119",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "4689",
+ "actual": "4689",
+ "correct": true,
+ "inputTokens": 1509,
+ "outputTokens": 6,
+ "latencyMs": 1487
+ },
+ {
+ "questionId": "q119",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "4689",
+ "actual": "4689",
+ "correct": true,
+ "inputTokens": 1442,
+ "outputTokens": 3,
+ "latencyMs": 2752
+ },
+ {
+ "questionId": "q119",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "4689",
+ "actual": "4689",
+ "correct": true,
+ "inputTokens": 1445,
+ "outputTokens": 6,
+ "latencyMs": 909
+ },
+ {
+ "questionId": "q119",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "4689",
+ "actual": "4689",
+ "correct": true,
+ "inputTokens": 3830,
+ "outputTokens": 3,
+ "latencyMs": 3502
+ },
+ {
+ "questionId": "q119",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "4689",
+ "actual": "4689",
+ "correct": true,
+ "inputTokens": 3415,
+ "outputTokens": 6,
+ "latencyMs": 1212
+ },
+ {
+ "questionId": "q119",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "4689",
+ "actual": "4689",
+ "correct": true,
+ "inputTokens": 2986,
+ "outputTokens": 3,
+ "latencyMs": 1218
+ },
+ {
+ "questionId": "q119",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "4689",
+ "actual": "4689",
+ "correct": true,
+ "inputTokens": 3110,
+ "outputTokens": 6,
+ "latencyMs": 1064
+ },
+ {
+ "questionId": "q120",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "2637.73",
+ "actual": "2637.73",
+ "correct": true,
+ "inputTokens": 3712,
+ "outputTokens": 5,
+ "latencyMs": 2777
+ },
+ {
+ "questionId": "q120",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "2637.73",
+ "actual": "2637.73",
+ "correct": true,
+ "inputTokens": 4079,
+ "outputTokens": 8,
+ "latencyMs": 1246
+ },
+ {
+ "questionId": "q120",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "2637.73",
+ "actual": "2637.73",
+ "correct": true,
+ "inputTokens": 1563,
+ "outputTokens": 5,
+ "latencyMs": 1424
+ },
+ {
+ "questionId": "q120",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "2637.73",
+ "actual": "2637.73",
+ "correct": true,
+ "inputTokens": 1508,
+ "outputTokens": 8,
+ "latencyMs": 1074
+ },
+ {
+ "questionId": "q120",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "2637.73",
+ "actual": "2637.73",
+ "correct": true,
+ "inputTokens": 1441,
+ "outputTokens": 5,
+ "latencyMs": 2803
+ },
+ {
+ "questionId": "q120",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "2637.73",
+ "actual": "2637.73",
+ "correct": true,
+ "inputTokens": 1444,
+ "outputTokens": 8,
+ "latencyMs": 1107
+ },
+ {
+ "questionId": "q120",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "2637.73",
+ "actual": "2637.73",
+ "correct": true,
+ "inputTokens": 3829,
+ "outputTokens": 5,
+ "latencyMs": 1066
+ },
+ {
+ "questionId": "q120",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "2637.73",
+ "actual": "2637.73",
+ "correct": true,
+ "inputTokens": 3414,
+ "outputTokens": 8,
+ "latencyMs": 1325
+ },
+ {
+ "questionId": "q120",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "2637.73",
+ "actual": "2637.73",
+ "correct": true,
+ "inputTokens": 2985,
+ "outputTokens": 5,
+ "latencyMs": 1330
+ },
+ {
+ "questionId": "q120",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "2637.73",
+ "actual": "2637.73",
+ "correct": true,
+ "inputTokens": 3109,
+ "outputTokens": 8,
+ "latencyMs": 1192
+ },
+ {
+ "questionId": "q121",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "5685",
+ "actual": "5685",
+ "correct": true,
+ "inputTokens": 3713,
+ "outputTokens": 3,
+ "latencyMs": 1139
+ },
+ {
+ "questionId": "q121",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "5685",
+ "actual": "5685",
+ "correct": true,
+ "inputTokens": 4080,
+ "outputTokens": 6,
+ "latencyMs": 994
+ },
+ {
+ "questionId": "q121",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "5685",
+ "actual": "5685",
+ "correct": true,
+ "inputTokens": 1564,
+ "outputTokens": 3,
+ "latencyMs": 1309
+ },
+ {
+ "questionId": "q121",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "5685",
+ "actual": "5685",
+ "correct": true,
+ "inputTokens": 1509,
+ "outputTokens": 6,
+ "latencyMs": 1184
+ },
+ {
+ "questionId": "q121",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "5685",
+ "actual": "5685",
+ "correct": true,
+ "inputTokens": 1442,
+ "outputTokens": 3,
+ "latencyMs": 1182
+ },
+ {
+ "questionId": "q121",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "5685",
+ "actual": "5685",
+ "correct": true,
+ "inputTokens": 1445,
+ "outputTokens": 6,
+ "latencyMs": 1381
+ },
+ {
+ "questionId": "q121",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "5685",
+ "actual": "5685",
+ "correct": true,
+ "inputTokens": 3830,
+ "outputTokens": 3,
+ "latencyMs": 1103
+ },
+ {
+ "questionId": "q121",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "5685",
+ "actual": "5685",
+ "correct": true,
+ "inputTokens": 3415,
+ "outputTokens": 6,
+ "latencyMs": 1220
+ },
+ {
+ "questionId": "q121",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "5685",
+ "actual": "5685",
+ "correct": true,
+ "inputTokens": 2986,
+ "outputTokens": 3,
+ "latencyMs": 1169
+ },
+ {
+ "questionId": "q121",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "5685",
+ "actual": "5685",
+ "correct": true,
+ "inputTokens": 3110,
+ "outputTokens": 6,
+ "latencyMs": 1208
+ },
+ {
+ "questionId": "q122",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "3421.06",
+ "actual": "3421.06",
+ "correct": true,
+ "inputTokens": 3712,
+ "outputTokens": 5,
+ "latencyMs": 1037
+ },
+ {
+ "questionId": "q122",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "3421.06",
+ "actual": "3421.06",
+ "correct": true,
+ "inputTokens": 4079,
+ "outputTokens": 8,
+ "latencyMs": 1278
+ },
+ {
+ "questionId": "q122",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "3421.06",
+ "actual": "3421.06",
+ "correct": true,
+ "inputTokens": 1563,
+ "outputTokens": 5,
+ "latencyMs": 1441
+ },
+ {
+ "questionId": "q122",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "3421.06",
+ "actual": "3421.06",
+ "correct": true,
+ "inputTokens": 1508,
+ "outputTokens": 8,
+ "latencyMs": 1204
+ },
+ {
+ "questionId": "q122",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "3421.06",
+ "actual": "3421.06",
+ "correct": true,
+ "inputTokens": 1441,
+ "outputTokens": 5,
+ "latencyMs": 1782
+ },
+ {
+ "questionId": "q122",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "3421.06",
+ "actual": "3421.06",
+ "correct": true,
+ "inputTokens": 1444,
+ "outputTokens": 8,
+ "latencyMs": 1088
+ },
+ {
+ "questionId": "q122",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "3421.06",
+ "actual": "3421.06",
+ "correct": true,
+ "inputTokens": 3829,
+ "outputTokens": 5,
+ "latencyMs": 1447
+ },
+ {
+ "questionId": "q122",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "3421.06",
+ "actual": "3421.06",
+ "correct": true,
+ "inputTokens": 3414,
+ "outputTokens": 8,
+ "latencyMs": 1356
+ },
+ {
+ "questionId": "q122",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "3421.06",
+ "actual": "3421.06",
+ "correct": true,
+ "inputTokens": 2985,
+ "outputTokens": 5,
+ "latencyMs": 1309
+ },
+ {
+ "questionId": "q122",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "3421.06",
+ "actual": "3421.06",
+ "correct": true,
+ "inputTokens": 3109,
+ "outputTokens": 8,
+ "latencyMs": 995
+ },
+ {
+ "questionId": "q123",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "344498",
+ "actual": "188,000",
+ "correct": false,
+ "inputTokens": 3710,
+ "outputTokens": 4,
+ "latencyMs": 1405
+ },
+ {
+ "questionId": "q123",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "344498",
+ "actual": "188,945",
+ "correct": false,
+ "inputTokens": 4077,
+ "outputTokens": 7,
+ "latencyMs": 1110
+ },
+ {
+ "questionId": "q123",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "344498",
+ "actual": "186,000",
+ "correct": false,
+ "inputTokens": 1561,
+ "outputTokens": 4,
+ "latencyMs": 1306
+ },
+ {
+ "questionId": "q123",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "344498",
+ "actual": "337,045",
+ "correct": false,
+ "inputTokens": 1506,
+ "outputTokens": 7,
+ "latencyMs": 1292
+ },
+ {
+ "questionId": "q123",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "344498",
+ "actual": "188,000",
+ "correct": false,
+ "inputTokens": 1439,
+ "outputTokens": 4,
+ "latencyMs": 2659
+ },
+ {
+ "questionId": "q123",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "344498",
+ "actual": "372,915",
+ "correct": false,
+ "inputTokens": 1442,
+ "outputTokens": 7,
+ "latencyMs": 966
+ },
+ {
+ "questionId": "q123",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "344498",
+ "actual": "174,000",
+ "correct": false,
+ "inputTokens": 3827,
+ "outputTokens": 4,
+ "latencyMs": 1177
+ },
+ {
+ "questionId": "q123",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "344498",
+ "actual": "188,647",
+ "correct": false,
+ "inputTokens": 3412,
+ "outputTokens": 7,
+ "latencyMs": 1018
+ },
+ {
+ "questionId": "q123",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "344498",
+ "actual": "188,000",
+ "correct": false,
+ "inputTokens": 2983,
+ "outputTokens": 4,
+ "latencyMs": 1659
+ },
+ {
+ "questionId": "q123",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "344498",
+ "actual": "181,854",
+ "correct": false,
+ "inputTokens": 3107,
+ "outputTokens": 7,
+ "latencyMs": 1894
+ },
+ {
+ "questionId": "q124",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "312818.50",
+ "actual": "188,174.36",
+ "correct": false,
+ "inputTokens": 3708,
+ "outputTokens": 6,
+ "latencyMs": 2900
+ },
+ {
+ "questionId": "q124",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "312818.50",
+ "actual": "287,745.89",
+ "correct": false,
+ "inputTokens": 4075,
+ "outputTokens": 9,
+ "latencyMs": 1196
+ },
+ {
+ "questionId": "q124",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "312818.50",
+ "actual": "Total revenue across all dates is 139,155.36.",
+ "correct": false,
+ "inputTokens": 1559,
+ "outputTokens": 14,
+ "latencyMs": 1401
+ },
+ {
+ "questionId": "q124",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "312818.50",
+ "actual": "487,891.45",
+ "correct": false,
+ "inputTokens": 1504,
+ "outputTokens": 9,
+ "latencyMs": 1118
+ },
+ {
+ "questionId": "q124",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "312818.50",
+ "actual": "Total revenue across all dates is 155,000.00.",
+ "correct": false,
+ "inputTokens": 1437,
+ "outputTokens": 14,
+ "latencyMs": 1308
+ },
+ {
+ "questionId": "q124",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "312818.50",
+ "actual": "487,891.89",
+ "correct": false,
+ "inputTokens": 1440,
+ "outputTokens": 9,
+ "latencyMs": 1120
+ },
+ {
+ "questionId": "q124",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "312818.50",
+ "actual": "Total revenue across all dates is 155,155.36.",
+ "correct": false,
+ "inputTokens": 3825,
+ "outputTokens": 14,
+ "latencyMs": 1143
+ },
+ {
+ "questionId": "q124",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "312818.50",
+ "actual": "381,968.89",
+ "correct": false,
+ "inputTokens": 3410,
+ "outputTokens": 9,
+ "latencyMs": 1172
+ },
+ {
+ "questionId": "q124",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "312818.50",
+ "actual": "Total revenue across all dates is 155,155.36.",
+ "correct": false,
+ "inputTokens": 2981,
+ "outputTokens": 14,
+ "latencyMs": 1179
+ },
+ {
+ "questionId": "q124",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "312818.50",
+ "actual": "381,847.89",
+ "correct": false,
+ "inputTokens": 3105,
+ "outputTokens": 9,
+ "latencyMs": 1073
+ },
+ {
+ "questionId": "q125",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "1811",
+ "actual": "1030",
+ "correct": false,
+ "inputTokens": 3710,
+ "outputTokens": 3,
+ "latencyMs": 3823
+ },
+ {
+ "questionId": "q125",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "1811",
+ "actual": "1,234",
+ "correct": false,
+ "inputTokens": 4078,
+ "outputTokens": 7,
+ "latencyMs": 1153
+ },
+ {
+ "questionId": "q125",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "1811",
+ "actual": "1040",
+ "correct": false,
+ "inputTokens": 1561,
+ "outputTokens": 3,
+ "latencyMs": 1472
+ },
+ {
+ "questionId": "q125",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "1811",
+ "actual": "1,945",
+ "correct": false,
+ "inputTokens": 1507,
+ "outputTokens": 7,
+ "latencyMs": 940
+ },
+ {
+ "questionId": "q125",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "1811",
+ "actual": "1030",
+ "correct": false,
+ "inputTokens": 1439,
+ "outputTokens": 3,
+ "latencyMs": 1067
+ },
+ {
+ "questionId": "q125",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "1811",
+ "actual": "1,945",
+ "correct": false,
+ "inputTokens": 1443,
+ "outputTokens": 7,
+ "latencyMs": 1183
+ },
+ {
+ "questionId": "q125",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "1811",
+ "actual": "Total conversions: 1030",
+ "correct": false,
+ "inputTokens": 3827,
+ "outputTokens": 7,
+ "latencyMs": 1103
+ },
+ {
+ "questionId": "q125",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "1811",
+ "actual": "1,454",
+ "correct": false,
+ "inputTokens": 3413,
+ "outputTokens": 7,
+ "latencyMs": 1067
+ },
+ {
+ "questionId": "q125",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "1811",
+ "actual": "1040",
+ "correct": false,
+ "inputTokens": 2983,
+ "outputTokens": 3,
+ "latencyMs": 932
+ },
+ {
+ "questionId": "q125",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "1811",
+ "actual": "1,454",
+ "correct": false,
+ "inputTokens": 3108,
+ "outputTokens": 7,
+ "latencyMs": 1530
+ },
+ {
+ "questionId": "q126",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "42",
+ "actual": "42",
+ "correct": true,
+ "inputTokens": 3710,
+ "outputTokens": 2,
+ "latencyMs": 1016
+ },
+ {
+ "questionId": "q126",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "42",
+ "actual": "42",
+ "correct": true,
+ "inputTokens": 4078,
+ "outputTokens": 5,
+ "latencyMs": 1440
+ },
+ {
+ "questionId": "q126",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "42",
+ "actual": "24",
+ "correct": false,
+ "inputTokens": 1561,
+ "outputTokens": 2,
+ "latencyMs": 1206
+ },
+ {
+ "questionId": "q126",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "42",
+ "actual": "42",
+ "correct": true,
+ "inputTokens": 1507,
+ "outputTokens": 5,
+ "latencyMs": 1452
+ },
+ {
+ "questionId": "q126",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "42",
+ "actual": "22",
+ "correct": false,
+ "inputTokens": 1439,
+ "outputTokens": 2,
+ "latencyMs": 1249
+ },
+ {
+ "questionId": "q126",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "42",
+ "actual": "42",
+ "correct": true,
+ "inputTokens": 1443,
+ "outputTokens": 5,
+ "latencyMs": 1248
+ },
+ {
+ "questionId": "q126",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "42",
+ "actual": "20",
+ "correct": false,
+ "inputTokens": 3827,
+ "outputTokens": 2,
+ "latencyMs": 1420
+ },
+ {
+ "questionId": "q126",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "42",
+ "actual": "47",
+ "correct": false,
+ "inputTokens": 3413,
+ "outputTokens": 5,
+ "latencyMs": 900
+ },
+ {
+ "questionId": "q126",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "42",
+ "actual": "42",
+ "correct": true,
+ "inputTokens": 2983,
+ "outputTokens": 2,
+ "latencyMs": 1309
+ },
+ {
+ "questionId": "q126",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "42",
+ "actual": "47",
+ "correct": false,
+ "inputTokens": 3108,
+ "outputTokens": 5,
+ "latencyMs": 1216
+ },
+ {
+ "questionId": "q127",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "28",
+ "actual": "38",
+ "correct": false,
+ "inputTokens": 3710,
+ "outputTokens": 2,
+ "latencyMs": 3911
+ },
+ {
+ "questionId": "q127",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "28",
+ "actual": "24",
+ "correct": false,
+ "inputTokens": 4078,
+ "outputTokens": 5,
+ "latencyMs": 1056
+ },
+ {
+ "questionId": "q127",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "28",
+ "actual": "20",
+ "correct": false,
+ "inputTokens": 1561,
+ "outputTokens": 2,
+ "latencyMs": 839
+ },
+ {
+ "questionId": "q127",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "28",
+ "actual": "26",
+ "correct": false,
+ "inputTokens": 1507,
+ "outputTokens": 5,
+ "latencyMs": 965
+ },
+ {
+ "questionId": "q127",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "28",
+ "actual": "20",
+ "correct": false,
+ "inputTokens": 1439,
+ "outputTokens": 2,
+ "latencyMs": 2163
+ },
+ {
+ "questionId": "q127",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "28",
+ "actual": "23",
+ "correct": false,
+ "inputTokens": 1443,
+ "outputTokens": 5,
+ "latencyMs": 1006
+ },
+ {
+ "questionId": "q127",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "28",
+ "actual": "18",
+ "correct": false,
+ "inputTokens": 3827,
+ "outputTokens": 2,
+ "latencyMs": 2619
+ },
+ {
+ "questionId": "q127",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "28",
+ "actual": "24",
+ "correct": false,
+ "inputTokens": 3413,
+ "outputTokens": 5,
+ "latencyMs": 989
+ },
+ {
+ "questionId": "q127",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "28",
+ "actual": "22",
+ "correct": false,
+ "inputTokens": 2983,
+ "outputTokens": 2,
+ "latencyMs": 1830
+ },
+ {
+ "questionId": "q127",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "28",
+ "actual": "23",
+ "correct": false,
+ "inputTokens": 3108,
+ "outputTokens": 5,
+ "latencyMs": 1001
+ },
+ {
+ "questionId": "q128",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "11",
+ "actual": "15",
+ "correct": false,
+ "inputTokens": 3710,
+ "outputTokens": 2,
+ "latencyMs": 1217
+ },
+ {
+ "questionId": "q128",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "11",
+ "actual": "11",
+ "correct": true,
+ "inputTokens": 4078,
+ "outputTokens": 5,
+ "latencyMs": 3180
+ },
+ {
+ "questionId": "q128",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "11",
+ "actual": "15",
+ "correct": false,
+ "inputTokens": 1561,
+ "outputTokens": 2,
+ "latencyMs": 1076
+ },
+ {
+ "questionId": "q128",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "11",
+ "actual": "12",
+ "correct": false,
+ "inputTokens": 1507,
+ "outputTokens": 5,
+ "latencyMs": 912
+ },
+ {
+ "questionId": "q128",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "11",
+ "actual": "15",
+ "correct": false,
+ "inputTokens": 1439,
+ "outputTokens": 2,
+ "latencyMs": 2900
+ },
+ {
+ "questionId": "q128",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "11",
+ "actual": "11",
+ "correct": true,
+ "inputTokens": 1443,
+ "outputTokens": 5,
+ "latencyMs": 1389
+ },
+ {
+ "questionId": "q128",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "11",
+ "actual": "12",
+ "correct": false,
+ "inputTokens": 3827,
+ "outputTokens": 2,
+ "latencyMs": 1107
+ },
+ {
+ "questionId": "q128",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "11",
+ "actual": "11",
+ "correct": true,
+ "inputTokens": 3413,
+ "outputTokens": 5,
+ "latencyMs": 1150
+ },
+ {
+ "questionId": "q128",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "11",
+ "actual": "18",
+ "correct": false,
+ "inputTokens": 2983,
+ "outputTokens": 2,
+ "latencyMs": 1047
+ },
+ {
+ "questionId": "q128",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "11",
+ "actual": "11",
+ "correct": true,
+ "inputTokens": 3108,
+ "outputTokens": 5,
+ "latencyMs": 1169
+ },
+ {
+ "questionId": "q129",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "58",
+ "actual": "36",
+ "correct": false,
+ "inputTokens": 3709,
+ "outputTokens": 2,
+ "latencyMs": 1007
+ },
+ {
+ "questionId": "q129",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "58",
+ "actual": "50",
+ "correct": false,
+ "inputTokens": 4078,
+ "outputTokens": 5,
+ "latencyMs": 1342
+ },
+ {
+ "questionId": "q129",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "58",
+ "actual": "24",
+ "correct": false,
+ "inputTokens": 1560,
+ "outputTokens": 2,
+ "latencyMs": 828
+ },
+ {
+ "questionId": "q129",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "58",
+ "actual": "47",
+ "correct": false,
+ "inputTokens": 1507,
+ "outputTokens": 5,
+ "latencyMs": 1305
+ },
+ {
+ "questionId": "q129",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "58",
+ "actual": "15",
+ "correct": false,
+ "inputTokens": 1438,
+ "outputTokens": 2,
+ "latencyMs": 1305
+ },
+ {
+ "questionId": "q129",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "58",
+ "actual": "54",
+ "correct": false,
+ "inputTokens": 1443,
+ "outputTokens": 5,
+ "latencyMs": 1406
+ },
+ {
+ "questionId": "q129",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "58",
+ "actual": "18",
+ "correct": false,
+ "inputTokens": 3826,
+ "outputTokens": 2,
+ "latencyMs": 1513
+ },
+ {
+ "questionId": "q129",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "58",
+ "actual": "47",
+ "correct": false,
+ "inputTokens": 3413,
+ "outputTokens": 5,
+ "latencyMs": 1026
+ },
+ {
+ "questionId": "q129",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "58",
+ "actual": "42",
+ "correct": false,
+ "inputTokens": 2982,
+ "outputTokens": 2,
+ "latencyMs": 1373
+ },
+ {
+ "questionId": "q129",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "58",
+ "actual": "54",
+ "correct": false,
+ "inputTokens": 3108,
+ "outputTokens": 5,
+ "latencyMs": 1112
+ },
+ {
+ "questionId": "q130",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "41",
+ "actual": "34",
+ "correct": false,
+ "inputTokens": 3709,
+ "outputTokens": 2,
+ "latencyMs": 1248
+ },
+ {
+ "questionId": "q130",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "41",
+ "actual": "31",
+ "correct": false,
+ "inputTokens": 4078,
+ "outputTokens": 5,
+ "latencyMs": 1083
+ },
+ {
+ "questionId": "q130",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "41",
+ "actual": "24",
+ "correct": false,
+ "inputTokens": 1560,
+ "outputTokens": 2,
+ "latencyMs": 895
+ },
+ {
+ "questionId": "q130",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "41",
+ "actual": "38",
+ "correct": false,
+ "inputTokens": 1507,
+ "outputTokens": 5,
+ "latencyMs": 1087
+ },
+ {
+ "questionId": "q130",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "41",
+ "actual": "18",
+ "correct": false,
+ "inputTokens": 1438,
+ "outputTokens": 2,
+ "latencyMs": 1157
+ },
+ {
+ "questionId": "q130",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "41",
+ "actual": "38",
+ "correct": false,
+ "inputTokens": 1443,
+ "outputTokens": 5,
+ "latencyMs": 1155
+ },
+ {
+ "questionId": "q130",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "41",
+ "actual": "18",
+ "correct": false,
+ "inputTokens": 3826,
+ "outputTokens": 2,
+ "latencyMs": 1959
+ },
+ {
+ "questionId": "q130",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "41",
+ "actual": "31",
+ "correct": false,
+ "inputTokens": 3413,
+ "outputTokens": 5,
+ "latencyMs": 1110
+ },
+ {
+ "questionId": "q130",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "41",
+ "actual": "34",
+ "correct": false,
+ "inputTokens": 2982,
+ "outputTokens": 2,
+ "latencyMs": 4540
+ },
+ {
+ "questionId": "q130",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "41",
+ "actual": "31",
+ "correct": false,
+ "inputTokens": 3108,
+ "outputTokens": 5,
+ "latencyMs": 1286
+ },
+ {
+ "questionId": "q131",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "23",
+ "actual": "18",
+ "correct": false,
+ "inputTokens": 3709,
+ "outputTokens": 2,
+ "latencyMs": 1059
+ },
+ {
+ "questionId": "q131",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "23",
+ "actual": "20",
+ "correct": false,
+ "inputTokens": 4078,
+ "outputTokens": 5,
+ "latencyMs": 1302
+ },
+ {
+ "questionId": "q131",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "23",
+ "actual": "18",
+ "correct": false,
+ "inputTokens": 1560,
+ "outputTokens": 2,
+ "latencyMs": 1019
+ },
+ {
+ "questionId": "q131",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "23",
+ "actual": "20",
+ "correct": false,
+ "inputTokens": 1507,
+ "outputTokens": 5,
+ "latencyMs": 975
+ },
+ {
+ "questionId": "q131",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "23",
+ "actual": "18",
+ "correct": false,
+ "inputTokens": 1438,
+ "outputTokens": 2,
+ "latencyMs": 1056
+ },
+ {
+ "questionId": "q131",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "23",
+ "actual": "20",
+ "correct": false,
+ "inputTokens": 1443,
+ "outputTokens": 5,
+ "latencyMs": 984
+ },
+ {
+ "questionId": "q131",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "23",
+ "actual": "15",
+ "correct": false,
+ "inputTokens": 3826,
+ "outputTokens": 2,
+ "latencyMs": 1420
+ },
+ {
+ "questionId": "q131",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "23",
+ "actual": "21",
+ "correct": false,
+ "inputTokens": 3413,
+ "outputTokens": 5,
+ "latencyMs": 1139
+ },
+ {
+ "questionId": "q131",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "23",
+ "actual": "18",
+ "correct": false,
+ "inputTokens": 2982,
+ "outputTokens": 2,
+ "latencyMs": 1097
+ },
+ {
+ "questionId": "q131",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "23",
+ "actual": "21",
+ "correct": false,
+ "inputTokens": 3108,
+ "outputTokens": 5,
+ "latencyMs": 1203
+ },
+ {
+ "questionId": "q132",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "430828",
+ "actual": "430828",
+ "correct": true,
+ "inputTokens": 15188,
+ "outputTokens": 3,
+ "latencyMs": 2257
+ },
+ {
+ "questionId": "q132",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "430828",
+ "actual": "430828",
+ "correct": true,
+ "inputTokens": 17409,
+ "outputTokens": 6,
+ "latencyMs": 1292
+ },
+ {
+ "questionId": "q132",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "430828",
+ "actual": "430828",
+ "correct": true,
+ "inputTokens": 8789,
+ "outputTokens": 3,
+ "latencyMs": 1877
+ },
+ {
+ "questionId": "q132",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "430828",
+ "actual": "430828",
+ "correct": true,
+ "inputTokens": 9279,
+ "outputTokens": 6,
+ "latencyMs": 1118
+ },
+ {
+ "questionId": "q132",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "430828",
+ "actual": "430828",
+ "correct": true,
+ "inputTokens": 8557,
+ "outputTokens": 3,
+ "latencyMs": 4023
+ },
+ {
+ "questionId": "q132",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "430828",
+ "actual": "430828",
+ "correct": true,
+ "inputTokens": 9125,
+ "outputTokens": 6,
+ "latencyMs": 1134
+ },
+ {
+ "questionId": "q132",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "430828",
+ "actual": "430828",
+ "correct": true,
+ "inputTokens": 15482,
+ "outputTokens": 3,
+ "latencyMs": 5304
+ },
+ {
+ "questionId": "q132",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "430828",
+ "actual": "430828",
+ "correct": true,
+ "inputTokens": 15367,
+ "outputTokens": 6,
+ "latencyMs": 1442
+ },
+ {
+ "questionId": "q132",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "430828",
+ "actual": "430828",
+ "correct": true,
+ "inputTokens": 13172,
+ "outputTokens": 3,
+ "latencyMs": 2157
+ },
+ {
+ "questionId": "q132",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "430828",
+ "actual": "430828",
+ "correct": true,
+ "inputTokens": 14483,
+ "outputTokens": 6,
+ "latencyMs": 1483
+ },
+ {
+ "questionId": "q133",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "11798",
+ "actual": "11798",
+ "correct": true,
+ "inputTokens": 15190,
+ "outputTokens": 3,
+ "latencyMs": 2084
+ },
+ {
+ "questionId": "q133",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "11798",
+ "actual": "11798",
+ "correct": true,
+ "inputTokens": 17410,
+ "outputTokens": 6,
+ "latencyMs": 2592
+ },
+ {
+ "questionId": "q133",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "11798",
+ "actual": "11798",
+ "correct": true,
+ "inputTokens": 8791,
+ "outputTokens": 3,
+ "latencyMs": 1208
+ },
+ {
+ "questionId": "q133",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "11798",
+ "actual": "11798",
+ "correct": true,
+ "inputTokens": 9280,
+ "outputTokens": 6,
+ "latencyMs": 1261
+ },
+ {
+ "questionId": "q133",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "11798",
+ "actual": "11798",
+ "correct": true,
+ "inputTokens": 8559,
+ "outputTokens": 3,
+ "latencyMs": 1697
+ },
+ {
+ "questionId": "q133",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "11798",
+ "actual": "11798",
+ "correct": true,
+ "inputTokens": 9126,
+ "outputTokens": 6,
+ "latencyMs": 1171
+ },
+ {
+ "questionId": "q133",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "11798",
+ "actual": "11798",
+ "correct": true,
+ "inputTokens": 15484,
+ "outputTokens": 3,
+ "latencyMs": 1704
+ },
+ {
+ "questionId": "q133",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "11798",
+ "actual": "11798",
+ "correct": true,
+ "inputTokens": 15368,
+ "outputTokens": 6,
+ "latencyMs": 1637
+ },
+ {
+ "questionId": "q133",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "11798",
+ "actual": "11798",
+ "correct": true,
+ "inputTokens": 13174,
+ "outputTokens": 3,
+ "latencyMs": 1599
+ },
+ {
+ "questionId": "q133",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "11798",
+ "actual": "11798",
+ "correct": true,
+ "inputTokens": 14484,
+ "outputTokens": 6,
+ "latencyMs": 1505
+ },
+ {
+ "questionId": "q134",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "183631",
+ "actual": "183631",
+ "correct": true,
+ "inputTokens": 15193,
+ "outputTokens": 3,
+ "latencyMs": 2340
+ },
+ {
+ "questionId": "q134",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "183631",
+ "actual": "183631",
+ "correct": true,
+ "inputTokens": 17412,
+ "outputTokens": 6,
+ "latencyMs": 1380
+ },
+ {
+ "questionId": "q134",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "183631",
+ "actual": "183631",
+ "correct": true,
+ "inputTokens": 8794,
+ "outputTokens": 3,
+ "latencyMs": 1631
+ },
+ {
+ "questionId": "q134",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "183631",
+ "actual": "183631",
+ "correct": true,
+ "inputTokens": 9282,
+ "outputTokens": 6,
+ "latencyMs": 1271
+ },
+ {
+ "questionId": "q134",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "183631",
+ "actual": "183631",
+ "correct": true,
+ "inputTokens": 8562,
+ "outputTokens": 3,
+ "latencyMs": 1620
+ },
+ {
+ "questionId": "q134",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "183631",
+ "actual": "183631",
+ "correct": true,
+ "inputTokens": 9128,
+ "outputTokens": 6,
+ "latencyMs": 1279
+ },
+ {
+ "questionId": "q134",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "183631",
+ "actual": "183631",
+ "correct": true,
+ "inputTokens": 15487,
+ "outputTokens": 3,
+ "latencyMs": 14565
+ },
+ {
+ "questionId": "q134",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "183631",
+ "actual": "183631",
+ "correct": true,
+ "inputTokens": 15370,
+ "outputTokens": 6,
+ "latencyMs": 1559
+ },
+ {
+ "questionId": "q134",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "183631",
+ "actual": "183631",
+ "correct": true,
+ "inputTokens": 13177,
+ "outputTokens": 3,
+ "latencyMs": 1600
+ },
+ {
+ "questionId": "q134",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "183631",
+ "actual": "183631",
+ "correct": true,
+ "inputTokens": 14486,
+ "outputTokens": 6,
+ "latencyMs": 1179
+ },
+ {
+ "questionId": "q135",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "29246",
+ "actual": "29246",
+ "correct": true,
+ "inputTokens": 15192,
+ "outputTokens": 3,
+ "latencyMs": 2508
+ },
+ {
+ "questionId": "q135",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "29246",
+ "actual": "29246",
+ "correct": true,
+ "inputTokens": 17412,
+ "outputTokens": 6,
+ "latencyMs": 1359
+ },
+ {
+ "questionId": "q135",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "29246",
+ "actual": "29246",
+ "correct": true,
+ "inputTokens": 8793,
+ "outputTokens": 3,
+ "latencyMs": 1188
+ },
+ {
+ "questionId": "q135",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "29246",
+ "actual": "29246",
+ "correct": true,
+ "inputTokens": 9282,
+ "outputTokens": 6,
+ "latencyMs": 1204
+ },
+ {
+ "questionId": "q135",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "29246",
+ "actual": "29246",
+ "correct": true,
+ "inputTokens": 8561,
+ "outputTokens": 3,
+ "latencyMs": 2448
+ },
+ {
+ "questionId": "q135",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "29246",
+ "actual": "29246",
+ "correct": true,
+ "inputTokens": 9128,
+ "outputTokens": 6,
+ "latencyMs": 1311
+ },
+ {
+ "questionId": "q135",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "29246",
+ "actual": "29246",
+ "correct": true,
+ "inputTokens": 15486,
+ "outputTokens": 3,
+ "latencyMs": 2442
+ },
+ {
+ "questionId": "q135",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "29246",
+ "actual": "29246",
+ "correct": true,
+ "inputTokens": 15370,
+ "outputTokens": 6,
+ "latencyMs": 1414
+ },
+ {
+ "questionId": "q135",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "29246",
+ "actual": "29246",
+ "correct": true,
+ "inputTokens": 13176,
+ "outputTokens": 3,
+ "latencyMs": 2254
+ },
+ {
+ "questionId": "q135",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "29246",
+ "actual": "29246",
+ "correct": true,
+ "inputTokens": 14486,
+ "outputTokens": 6,
+ "latencyMs": 1512
+ },
+ {
+ "questionId": "q136",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "135306",
+ "actual": "135306",
+ "correct": true,
+ "inputTokens": 15188,
+ "outputTokens": 3,
+ "latencyMs": 1565
+ },
+ {
+ "questionId": "q136",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "135306",
+ "actual": "135306",
+ "correct": true,
+ "inputTokens": 17407,
+ "outputTokens": 6,
+ "latencyMs": 1871
+ },
+ {
+ "questionId": "q136",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "135306",
+ "actual": "135306",
+ "correct": true,
+ "inputTokens": 8789,
+ "outputTokens": 3,
+ "latencyMs": 1963
+ },
+ {
+ "questionId": "q136",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "135306",
+ "actual": "135306",
+ "correct": true,
+ "inputTokens": 9277,
+ "outputTokens": 6,
+ "latencyMs": 1533
+ },
+ {
+ "questionId": "q136",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "135306",
+ "actual": "135306",
+ "correct": true,
+ "inputTokens": 8557,
+ "outputTokens": 3,
+ "latencyMs": 1561
+ },
+ {
+ "questionId": "q136",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "135306",
+ "actual": "135306",
+ "correct": true,
+ "inputTokens": 9123,
+ "outputTokens": 6,
+ "latencyMs": 1200
+ },
+ {
+ "questionId": "q136",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "135306",
+ "actual": "135306",
+ "correct": true,
+ "inputTokens": 15482,
+ "outputTokens": 3,
+ "latencyMs": 1657
+ },
+ {
+ "questionId": "q136",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "135306",
+ "actual": "135306",
+ "correct": true,
+ "inputTokens": 15365,
+ "outputTokens": 6,
+ "latencyMs": 1582
+ },
+ {
+ "questionId": "q136",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "135306",
+ "actual": "135306",
+ "correct": true,
+ "inputTokens": 13172,
+ "outputTokens": 3,
+ "latencyMs": 3402
+ },
+ {
+ "questionId": "q136",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "135306",
+ "actual": "135306",
+ "correct": true,
+ "inputTokens": 14481,
+ "outputTokens": 6,
+ "latencyMs": 1251
+ },
+ {
+ "questionId": "q137",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "24914",
+ "actual": "24914",
+ "correct": true,
+ "inputTokens": 15187,
+ "outputTokens": 3,
+ "latencyMs": 2019
+ },
+ {
+ "questionId": "q137",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "24914",
+ "actual": "24914",
+ "correct": true,
+ "inputTokens": 17408,
+ "outputTokens": 6,
+ "latencyMs": 1517
+ },
+ {
+ "questionId": "q137",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "24914",
+ "actual": "The repository undefined/react-native does not exist in the provided data.",
+ "correct": false,
+ "inputTokens": 8788,
+ "outputTokens": 14,
+ "latencyMs": 1737
+ },
+ {
+ "questionId": "q137",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "24914",
+ "actual": "24914",
+ "correct": true,
+ "inputTokens": 9278,
+ "outputTokens": 6,
+ "latencyMs": 1467
+ },
+ {
+ "questionId": "q137",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "24914",
+ "actual": "24914",
+ "correct": true,
+ "inputTokens": 8556,
+ "outputTokens": 3,
+ "latencyMs": 3442
+ },
+ {
+ "questionId": "q137",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "24914",
+ "actual": "24914",
+ "correct": true,
+ "inputTokens": 9124,
+ "outputTokens": 6,
+ "latencyMs": 1300
+ },
+ {
+ "questionId": "q137",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "24914",
+ "actual": "24914",
+ "correct": true,
+ "inputTokens": 15481,
+ "outputTokens": 3,
+ "latencyMs": 1825
+ },
+ {
+ "questionId": "q137",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "24914",
+ "actual": "24914",
+ "correct": true,
+ "inputTokens": 15366,
+ "outputTokens": 6,
+ "latencyMs": 1443
+ },
+ {
+ "questionId": "q137",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "24914",
+ "actual": "124320",
+ "correct": false,
+ "inputTokens": 13171,
+ "outputTokens": 3,
+ "latencyMs": 1783
+ },
+ {
+ "questionId": "q137",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "24914",
+ "actual": "24914",
+ "correct": true,
+ "inputTokens": 14482,
+ "outputTokens": 6,
+ "latencyMs": 1362
+ },
+ {
+ "questionId": "q138",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "111683",
+ "actual": "111683",
+ "correct": true,
+ "inputTokens": 15187,
+ "outputTokens": 3,
+ "latencyMs": 1824
+ },
+ {
+ "questionId": "q138",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "111683",
+ "actual": "111683",
+ "correct": true,
+ "inputTokens": 17407,
+ "outputTokens": 6,
+ "latencyMs": 1479
+ },
+ {
+ "questionId": "q138",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "111683",
+ "actual": "108017",
+ "correct": false,
+ "inputTokens": 8788,
+ "outputTokens": 3,
+ "latencyMs": 3315
+ },
+ {
+ "questionId": "q138",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "111683",
+ "actual": "111683",
+ "correct": true,
+ "inputTokens": 9277,
+ "outputTokens": 6,
+ "latencyMs": 1270
+ },
+ {
+ "questionId": "q138",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "111683",
+ "actual": "111683",
+ "correct": true,
+ "inputTokens": 8556,
+ "outputTokens": 3,
+ "latencyMs": 1384
+ },
+ {
+ "questionId": "q138",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "111683",
+ "actual": "111683",
+ "correct": true,
+ "inputTokens": 9123,
+ "outputTokens": 6,
+ "latencyMs": 1252
+ },
+ {
+ "questionId": "q138",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "111683",
+ "actual": "111683",
+ "correct": true,
+ "inputTokens": 15481,
+ "outputTokens": 3,
+ "latencyMs": 3048
+ },
+ {
+ "questionId": "q138",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "111683",
+ "actual": "111683",
+ "correct": true,
+ "inputTokens": 15365,
+ "outputTokens": 6,
+ "latencyMs": 1381
+ },
+ {
+ "questionId": "q138",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "111683",
+ "actual": "111683",
+ "correct": true,
+ "inputTokens": 13171,
+ "outputTokens": 3,
+ "latencyMs": 3804
+ },
+ {
+ "questionId": "q138",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "111683",
+ "actual": "111683",
+ "correct": true,
+ "inputTokens": 14481,
+ "outputTokens": 6,
+ "latencyMs": 1498
+ },
+ {
+ "questionId": "q139",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "13364",
+ "actual": "13364",
+ "correct": true,
+ "inputTokens": 15194,
+ "outputTokens": 3,
+ "latencyMs": 1726
+ },
+ {
+ "questionId": "q139",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "13364",
+ "actual": "13364",
+ "correct": true,
+ "inputTokens": 17412,
+ "outputTokens": 6,
+ "latencyMs": 1526
+ },
+ {
+ "questionId": "q139",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "13364",
+ "actual": "13364",
+ "correct": true,
+ "inputTokens": 8795,
+ "outputTokens": 3,
+ "latencyMs": 1685
+ },
+ {
+ "questionId": "q139",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "13364",
+ "actual": "13364",
+ "correct": true,
+ "inputTokens": 9282,
+ "outputTokens": 6,
+ "latencyMs": 1140
+ },
+ {
+ "questionId": "q139",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "13364",
+ "actual": "0",
+ "correct": false,
+ "inputTokens": 8563,
+ "outputTokens": 2,
+ "latencyMs": 1933
+ },
+ {
+ "questionId": "q139",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "13364",
+ "actual": "13364",
+ "correct": true,
+ "inputTokens": 9128,
+ "outputTokens": 6,
+ "latencyMs": 1157
+ },
+ {
+ "questionId": "q139",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "13364",
+ "actual": "13364",
+ "correct": true,
+ "inputTokens": 15488,
+ "outputTokens": 3,
+ "latencyMs": 1249
+ },
+ {
+ "questionId": "q139",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "13364",
+ "actual": "13364",
+ "correct": true,
+ "inputTokens": 15370,
+ "outputTokens": 6,
+ "latencyMs": 1347
+ },
+ {
+ "questionId": "q139",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "13364",
+ "actual": "13364",
+ "correct": true,
+ "inputTokens": 13178,
+ "outputTokens": 3,
+ "latencyMs": 2174
+ },
+ {
+ "questionId": "q139",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "13364",
+ "actual": "13364",
+ "correct": true,
+ "inputTokens": 14486,
+ "outputTokens": 6,
+ "latencyMs": 1197
+ },
+ {
+ "questionId": "q140",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "98464",
+ "actual": "0",
+ "correct": false,
+ "inputTokens": 15186,
+ "outputTokens": 2,
+ "latencyMs": 3252
+ },
+ {
+ "questionId": "q140",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "98464",
+ "actual": "98464",
+ "correct": true,
+ "inputTokens": 17405,
+ "outputTokens": 6,
+ "latencyMs": 1667
+ },
+ {
+ "questionId": "q140",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "98464",
+ "actual": "0",
+ "correct": false,
+ "inputTokens": 8787,
+ "outputTokens": 2,
+ "latencyMs": 1192
+ },
+ {
+ "questionId": "q140",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "98464",
+ "actual": "98464",
+ "correct": true,
+ "inputTokens": 9275,
+ "outputTokens": 6,
+ "latencyMs": 1113
+ },
+ {
+ "questionId": "q140",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "98464",
+ "actual": "0",
+ "correct": false,
+ "inputTokens": 8555,
+ "outputTokens": 2,
+ "latencyMs": 2198
+ },
+ {
+ "questionId": "q140",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "98464",
+ "actual": "98464",
+ "correct": true,
+ "inputTokens": 9121,
+ "outputTokens": 6,
+ "latencyMs": 1187
+ },
+ {
+ "questionId": "q140",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "98464",
+ "actual": "0",
+ "correct": false,
+ "inputTokens": 15480,
+ "outputTokens": 2,
+ "latencyMs": 8573
+ },
+ {
+ "questionId": "q140",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "98464",
+ "actual": "98464",
+ "correct": true,
+ "inputTokens": 15363,
+ "outputTokens": 6,
+ "latencyMs": 1311
+ },
+ {
+ "questionId": "q140",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "98464",
+ "actual": "0",
+ "correct": false,
+ "inputTokens": 13170,
+ "outputTokens": 2,
+ "latencyMs": 3471
+ },
+ {
+ "questionId": "q140",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "98464",
+ "actual": "98464",
+ "correct": true,
+ "inputTokens": 14479,
+ "outputTokens": 6,
+ "latencyMs": 1457
+ },
+ {
+ "questionId": "q141",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "6378",
+ "actual": "6378",
+ "correct": true,
+ "inputTokens": 15188,
+ "outputTokens": 3,
+ "latencyMs": 1363
+ },
+ {
+ "questionId": "q141",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "6378",
+ "actual": "6378",
+ "correct": true,
+ "inputTokens": 17408,
+ "outputTokens": 6,
+ "latencyMs": 1803
+ },
+ {
+ "questionId": "q141",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "6378",
+ "actual": "6378",
+ "correct": true,
+ "inputTokens": 8789,
+ "outputTokens": 3,
+ "latencyMs": 3696
+ },
+ {
+ "questionId": "q141",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "6378",
+ "actual": "6378",
+ "correct": true,
+ "inputTokens": 9278,
+ "outputTokens": 6,
+ "latencyMs": 1391
+ },
+ {
+ "questionId": "q141",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "6378",
+ "actual": "93731",
+ "correct": false,
+ "inputTokens": 8557,
+ "outputTokens": 3,
+ "latencyMs": 7861
+ },
+ {
+ "questionId": "q141",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "6378",
+ "actual": "6378",
+ "correct": true,
+ "inputTokens": 9124,
+ "outputTokens": 6,
+ "latencyMs": 1420
+ },
+ {
+ "questionId": "q141",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "6378",
+ "actual": "6378",
+ "correct": true,
+ "inputTokens": 15482,
+ "outputTokens": 3,
+ "latencyMs": 1769
+ },
+ {
+ "questionId": "q141",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "6378",
+ "actual": "6378",
+ "correct": true,
+ "inputTokens": 15366,
+ "outputTokens": 6,
+ "latencyMs": 1233
+ },
+ {
+ "questionId": "q141",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "6378",
+ "actual": "93731",
+ "correct": false,
+ "inputTokens": 13172,
+ "outputTokens": 3,
+ "latencyMs": 1831
+ },
+ {
+ "questionId": "q141",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "6378",
+ "actual": "6378",
+ "correct": true,
+ "inputTokens": 14482,
+ "outputTokens": 6,
+ "latencyMs": 1507
+ },
+ {
+ "questionId": "q142",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "254916",
+ "actual": "254916",
+ "correct": true,
+ "inputTokens": 15190,
+ "outputTokens": 3,
+ "latencyMs": 10752
+ },
+ {
+ "questionId": "q142",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "254916",
+ "actual": "254916",
+ "correct": true,
+ "inputTokens": 17409,
+ "outputTokens": 6,
+ "latencyMs": 1672
+ },
+ {
+ "questionId": "q142",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "254916",
+ "actual": "254916",
+ "correct": true,
+ "inputTokens": 8791,
+ "outputTokens": 3,
+ "latencyMs": 1788
+ },
+ {
+ "questionId": "q142",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "254916",
+ "actual": "254916",
+ "correct": true,
+ "inputTokens": 9279,
+ "outputTokens": 6,
+ "latencyMs": 1633
+ },
+ {
+ "questionId": "q142",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "254916",
+ "actual": "254916",
+ "correct": true,
+ "inputTokens": 8559,
+ "outputTokens": 3,
+ "latencyMs": 1365
+ },
+ {
+ "questionId": "q142",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "254916",
+ "actual": "254916",
+ "correct": true,
+ "inputTokens": 9125,
+ "outputTokens": 6,
+ "latencyMs": 1242
+ },
+ {
+ "questionId": "q142",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "254916",
+ "actual": "254916",
+ "correct": true,
+ "inputTokens": 15484,
+ "outputTokens": 3,
+ "latencyMs": 2237
+ },
+ {
+ "questionId": "q142",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "254916",
+ "actual": "254916",
+ "correct": true,
+ "inputTokens": 15367,
+ "outputTokens": 6,
+ "latencyMs": 1275
+ },
+ {
+ "questionId": "q142",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "254916",
+ "actual": "254916",
+ "correct": true,
+ "inputTokens": 13174,
+ "outputTokens": 3,
+ "latencyMs": 3028
+ },
+ {
+ "questionId": "q142",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "254916",
+ "actual": "254916",
+ "correct": true,
+ "inputTokens": 14483,
+ "outputTokens": 6,
+ "latencyMs": 1615
+ },
+ {
+ "questionId": "q143",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "32413",
+ "actual": "32413",
+ "correct": true,
+ "inputTokens": 15188,
+ "outputTokens": 3,
+ "latencyMs": 1972
+ },
+ {
+ "questionId": "q143",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "32413",
+ "actual": "32413",
+ "correct": true,
+ "inputTokens": 17410,
+ "outputTokens": 6,
+ "latencyMs": 2308
+ },
+ {
+ "questionId": "q143",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "32413",
+ "actual": "32413",
+ "correct": true,
+ "inputTokens": 8789,
+ "outputTokens": 3,
+ "latencyMs": 1361
+ },
+ {
+ "questionId": "q143",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "32413",
+ "actual": "32413",
+ "correct": true,
+ "inputTokens": 9280,
+ "outputTokens": 6,
+ "latencyMs": 1162
+ },
+ {
+ "questionId": "q143",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "32413",
+ "actual": "32413",
+ "correct": true,
+ "inputTokens": 8557,
+ "outputTokens": 3,
+ "latencyMs": 2196
+ },
+ {
+ "questionId": "q143",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "32413",
+ "actual": "32413",
+ "correct": true,
+ "inputTokens": 9126,
+ "outputTokens": 6,
+ "latencyMs": 1199
+ },
+ {
+ "questionId": "q143",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "32413",
+ "actual": "32413",
+ "correct": true,
+ "inputTokens": 15482,
+ "outputTokens": 3,
+ "latencyMs": 1758
+ },
+ {
+ "questionId": "q143",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "32413",
+ "actual": "32413",
+ "correct": true,
+ "inputTokens": 15368,
+ "outputTokens": 6,
+ "latencyMs": 1340
+ },
+ {
+ "questionId": "q143",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "32413",
+ "actual": "32413",
+ "correct": true,
+ "inputTokens": 13172,
+ "outputTokens": 3,
+ "latencyMs": 2122
+ },
+ {
+ "questionId": "q143",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "32413",
+ "actual": "32413",
+ "correct": true,
+ "inputTokens": 14484,
+ "outputTokens": 6,
+ "latencyMs": 1156
+ },
+ {
+ "questionId": "q144",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "240059",
+ "actual": "0",
+ "correct": false,
+ "inputTokens": 15186,
+ "outputTokens": 2,
+ "latencyMs": 1208
+ },
+ {
+ "questionId": "q144",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "240059",
+ "actual": "240059",
+ "correct": true,
+ "inputTokens": 17405,
+ "outputTokens": 6,
+ "latencyMs": 1826
+ },
+ {
+ "questionId": "q144",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "240059",
+ "actual": "undefined",
+ "correct": false,
+ "inputTokens": 8787,
+ "outputTokens": 2,
+ "latencyMs": 2224
+ },
+ {
+ "questionId": "q144",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "240059",
+ "actual": "240059",
+ "correct": true,
+ "inputTokens": 9275,
+ "outputTokens": 6,
+ "latencyMs": 1220
+ },
+ {
+ "questionId": "q144",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "240059",
+ "actual": "undefined",
+ "correct": false,
+ "inputTokens": 8555,
+ "outputTokens": 2,
+ "latencyMs": 1199
+ },
+ {
+ "questionId": "q144",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "240059",
+ "actual": "240059",
+ "correct": true,
+ "inputTokens": 9121,
+ "outputTokens": 6,
+ "latencyMs": 1264
+ },
+ {
+ "questionId": "q144",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "240059",
+ "actual": "undefined/react does not exist in the provided data.",
+ "correct": false,
+ "inputTokens": 15480,
+ "outputTokens": 11,
+ "latencyMs": 3072
+ },
+ {
+ "questionId": "q144",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "240059",
+ "actual": "240059",
+ "correct": true,
+ "inputTokens": 15363,
+ "outputTokens": 6,
+ "latencyMs": 1609
+ },
+ {
+ "questionId": "q144",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "240059",
+ "actual": "undefined/react does not exist in the provided data.",
+ "correct": false,
+ "inputTokens": 13170,
+ "outputTokens": 11,
+ "latencyMs": 2608
+ },
+ {
+ "questionId": "q144",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "240059",
+ "actual": "240059",
+ "correct": true,
+ "inputTokens": 14479,
+ "outputTokens": 6,
+ "latencyMs": 1237
+ },
+ {
+ "questionId": "q145",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "48986",
+ "actual": "0",
+ "correct": false,
+ "inputTokens": 15187,
+ "outputTokens": 2,
+ "latencyMs": 1906
+ },
+ {
+ "questionId": "q145",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "48986",
+ "actual": "48986",
+ "correct": true,
+ "inputTokens": 17406,
+ "outputTokens": 6,
+ "latencyMs": 1399
+ },
+ {
+ "questionId": "q145",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "48986",
+ "actual": "0",
+ "correct": false,
+ "inputTokens": 8788,
+ "outputTokens": 2,
+ "latencyMs": 2026
+ },
+ {
+ "questionId": "q145",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "48986",
+ "actual": "48986",
+ "correct": true,
+ "inputTokens": 9276,
+ "outputTokens": 6,
+ "latencyMs": 1318
+ },
+ {
+ "questionId": "q145",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "48986",
+ "actual": "0",
+ "correct": false,
+ "inputTokens": 8556,
+ "outputTokens": 2,
+ "latencyMs": 1605
+ },
+ {
+ "questionId": "q145",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "48986",
+ "actual": "48986",
+ "correct": true,
+ "inputTokens": 9122,
+ "outputTokens": 6,
+ "latencyMs": 1270
+ },
+ {
+ "questionId": "q145",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "48986",
+ "actual": "0",
+ "correct": false,
+ "inputTokens": 15481,
+ "outputTokens": 2,
+ "latencyMs": 5367
+ },
+ {
+ "questionId": "q145",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "48986",
+ "actual": "48986",
+ "correct": true,
+ "inputTokens": 15364,
+ "outputTokens": 6,
+ "latencyMs": 1204
+ },
+ {
+ "questionId": "q145",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "48986",
+ "actual": "The repository \"undefined/Python\" does not exist in the provided data.",
+ "correct": false,
+ "inputTokens": 13171,
+ "outputTokens": 16,
+ "latencyMs": 6329
+ },
+ {
+ "questionId": "q145",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "48986",
+ "actual": "48986",
+ "correct": true,
+ "inputTokens": 14480,
+ "outputTokens": 6,
+ "latencyMs": 1369
+ },
+ {
+ "questionId": "q146",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "209624",
+ "actual": "209624",
+ "correct": true,
+ "inputTokens": 15186,
+ "outputTokens": 3,
+ "latencyMs": 2063
+ },
+ {
+ "questionId": "q146",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "209624",
+ "actual": "209624",
+ "correct": true,
+ "inputTokens": 17405,
+ "outputTokens": 6,
+ "latencyMs": 1470
+ },
+ {
+ "questionId": "q146",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "209624",
+ "actual": "209624",
+ "correct": true,
+ "inputTokens": 8787,
+ "outputTokens": 3,
+ "latencyMs": 1386
+ },
+ {
+ "questionId": "q146",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "209624",
+ "actual": "209624",
+ "correct": true,
+ "inputTokens": 9275,
+ "outputTokens": 6,
+ "latencyMs": 1104
+ },
+ {
+ "questionId": "q146",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "209624",
+ "actual": "209624",
+ "correct": true,
+ "inputTokens": 8555,
+ "outputTokens": 3,
+ "latencyMs": 1747
+ },
+ {
+ "questionId": "q146",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "209624",
+ "actual": "209624",
+ "correct": true,
+ "inputTokens": 9121,
+ "outputTokens": 6,
+ "latencyMs": 1300
+ },
+ {
+ "questionId": "q146",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "209624",
+ "actual": "209624",
+ "correct": true,
+ "inputTokens": 15480,
+ "outputTokens": 3,
+ "latencyMs": 1443
+ },
+ {
+ "questionId": "q146",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "209624",
+ "actual": "209624",
+ "correct": true,
+ "inputTokens": 15363,
+ "outputTokens": 6,
+ "latencyMs": 1282
+ },
+ {
+ "questionId": "q146",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "209624",
+ "actual": "209624",
+ "correct": true,
+ "inputTokens": 13170,
+ "outputTokens": 3,
+ "latencyMs": 2185
+ },
+ {
+ "questionId": "q146",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "209624",
+ "actual": "209624",
+ "correct": true,
+ "inputTokens": 14479,
+ "outputTokens": 6,
+ "latencyMs": 1407
+ },
+ {
+ "questionId": "q147",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "58023",
+ "actual": "58023",
+ "correct": true,
+ "inputTokens": 15186,
+ "outputTokens": 3,
+ "latencyMs": 1743
+ },
+ {
+ "questionId": "q147",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "58023",
+ "actual": "58023",
+ "correct": true,
+ "inputTokens": 17406,
+ "outputTokens": 6,
+ "latencyMs": 1564
+ },
+ {
+ "questionId": "q147",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "58023",
+ "actual": "58023",
+ "correct": true,
+ "inputTokens": 8787,
+ "outputTokens": 3,
+ "latencyMs": 1317
+ },
+ {
+ "questionId": "q147",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "58023",
+ "actual": "58023",
+ "correct": true,
+ "inputTokens": 9276,
+ "outputTokens": 6,
+ "latencyMs": 1258
+ },
+ {
+ "questionId": "q147",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "58023",
+ "actual": "58023",
+ "correct": true,
+ "inputTokens": 8555,
+ "outputTokens": 3,
+ "latencyMs": 2419
+ },
+ {
+ "questionId": "q147",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "58023",
+ "actual": "58023",
+ "correct": true,
+ "inputTokens": 9122,
+ "outputTokens": 6,
+ "latencyMs": 1171
+ },
+ {
+ "questionId": "q147",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "58023",
+ "actual": "undefined/linux does not exist in the provided data.",
+ "correct": false,
+ "inputTokens": 15480,
+ "outputTokens": 11,
+ "latencyMs": 1680
+ },
+ {
+ "questionId": "q147",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "58023",
+ "actual": "58023",
+ "correct": true,
+ "inputTokens": 15364,
+ "outputTokens": 6,
+ "latencyMs": 1396
+ },
+ {
+ "questionId": "q147",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "58023",
+ "actual": "The repository \"undefined/linux\" does not exist in the provided data.",
+ "correct": false,
+ "inputTokens": 13170,
+ "outputTokens": 15,
+ "latencyMs": 1418
+ },
+ {
+ "questionId": "q147",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "58023",
+ "actual": "58023",
+ "correct": true,
+ "inputTokens": 14480,
+ "outputTokens": 6,
+ "latencyMs": 1399
+ },
+ {
+ "questionId": "q148",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "196024",
+ "actual": "196024",
+ "correct": true,
+ "inputTokens": 15189,
+ "outputTokens": 3,
+ "latencyMs": 1673
+ },
+ {
+ "questionId": "q148",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "196024",
+ "actual": "196024",
+ "correct": true,
+ "inputTokens": 17407,
+ "outputTokens": 6,
+ "latencyMs": 1736
+ },
+ {
+ "questionId": "q148",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "196024",
+ "actual": "196024",
+ "correct": true,
+ "inputTokens": 8790,
+ "outputTokens": 3,
+ "latencyMs": 1754
+ },
+ {
+ "questionId": "q148",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "196024",
+ "actual": "196024",
+ "correct": true,
+ "inputTokens": 9277,
+ "outputTokens": 6,
+ "latencyMs": 1317
+ },
+ {
+ "questionId": "q148",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "196024",
+ "actual": "0",
+ "correct": false,
+ "inputTokens": 8558,
+ "outputTokens": 2,
+ "latencyMs": 3219
+ },
+ {
+ "questionId": "q148",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "196024",
+ "actual": "196024",
+ "correct": true,
+ "inputTokens": 9123,
+ "outputTokens": 6,
+ "latencyMs": 1311
+ },
+ {
+ "questionId": "q148",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "196024",
+ "actual": "196024",
+ "correct": true,
+ "inputTokens": 15483,
+ "outputTokens": 3,
+ "latencyMs": 1346
+ },
+ {
+ "questionId": "q148",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "196024",
+ "actual": "196024",
+ "correct": true,
+ "inputTokens": 15365,
+ "outputTokens": 6,
+ "latencyMs": 1560
+ },
+ {
+ "questionId": "q148",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "196024",
+ "actual": "196024",
+ "correct": true,
+ "inputTokens": 13173,
+ "outputTokens": 3,
+ "latencyMs": 1009
+ },
+ {
+ "questionId": "q148",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "196024",
+ "actual": "196024",
+ "correct": true,
+ "inputTokens": 14481,
+ "outputTokens": 6,
+ "latencyMs": 1446
+ },
+ {
+ "questionId": "q149",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "30919",
+ "actual": "30919",
+ "correct": true,
+ "inputTokens": 15189,
+ "outputTokens": 3,
+ "latencyMs": 3361
+ },
+ {
+ "questionId": "q149",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "30919",
+ "actual": "30919",
+ "correct": true,
+ "inputTokens": 17408,
+ "outputTokens": 6,
+ "latencyMs": 1788
+ },
+ {
+ "questionId": "q149",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "30919",
+ "actual": "30919",
+ "correct": true,
+ "inputTokens": 8790,
+ "outputTokens": 3,
+ "latencyMs": 1123
+ },
+ {
+ "questionId": "q149",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "30919",
+ "actual": "30919",
+ "correct": true,
+ "inputTokens": 9278,
+ "outputTokens": 6,
+ "latencyMs": 1235
+ },
+ {
+ "questionId": "q149",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "30919",
+ "actual": "30919",
+ "correct": true,
+ "inputTokens": 8558,
+ "outputTokens": 3,
+ "latencyMs": 1100
+ },
+ {
+ "questionId": "q149",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "30919",
+ "actual": "30919",
+ "correct": true,
+ "inputTokens": 9124,
+ "outputTokens": 6,
+ "latencyMs": 1188
+ },
+ {
+ "questionId": "q149",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "30919",
+ "actual": "30919",
+ "correct": true,
+ "inputTokens": 15483,
+ "outputTokens": 3,
+ "latencyMs": 1557
+ },
+ {
+ "questionId": "q149",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "30919",
+ "actual": "30919",
+ "correct": true,
+ "inputTokens": 15366,
+ "outputTokens": 6,
+ "latencyMs": 1352
+ },
+ {
+ "questionId": "q149",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "30919",
+ "actual": "30919",
+ "correct": true,
+ "inputTokens": 13173,
+ "outputTokens": 3,
+ "latencyMs": 1280
+ },
+ {
+ "questionId": "q149",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "30919",
+ "actual": "30919",
+ "correct": true,
+ "inputTokens": 14482,
+ "outputTokens": 6,
+ "latencyMs": 1247
+ },
+ {
+ "questionId": "q150",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "192220",
+ "actual": "192220",
+ "correct": true,
+ "inputTokens": 15188,
+ "outputTokens": 3,
+ "latencyMs": 1394
+ },
+ {
+ "questionId": "q150",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "192220",
+ "actual": "192220",
+ "correct": true,
+ "inputTokens": 17405,
+ "outputTokens": 6,
+ "latencyMs": 1801
+ },
+ {
+ "questionId": "q150",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "192220",
+ "actual": "192220",
+ "correct": true,
+ "inputTokens": 8789,
+ "outputTokens": 3,
+ "latencyMs": 2052
+ },
+ {
+ "questionId": "q150",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "192220",
+ "actual": "192220",
+ "correct": true,
+ "inputTokens": 9275,
+ "outputTokens": 6,
+ "latencyMs": 1176
+ },
+ {
+ "questionId": "q150",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "192220",
+ "actual": "192220",
+ "correct": true,
+ "inputTokens": 8557,
+ "outputTokens": 3,
+ "latencyMs": 2084
+ },
+ {
+ "questionId": "q150",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "192220",
+ "actual": "192220",
+ "correct": true,
+ "inputTokens": 9121,
+ "outputTokens": 6,
+ "latencyMs": 1191
+ },
+ {
+ "questionId": "q150",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "192220",
+ "actual": "192220",
+ "correct": true,
+ "inputTokens": 15482,
+ "outputTokens": 3,
+ "latencyMs": 1261
+ },
+ {
+ "questionId": "q150",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "192220",
+ "actual": "192220",
+ "correct": true,
+ "inputTokens": 15363,
+ "outputTokens": 6,
+ "latencyMs": 1355
+ },
+ {
+ "questionId": "q150",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "192220",
+ "actual": "192220",
+ "correct": true,
+ "inputTokens": 13172,
+ "outputTokens": 3,
+ "latencyMs": 3388
+ },
+ {
+ "questionId": "q150",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "192220",
+ "actual": "192220",
+ "correct": true,
+ "inputTokens": 14479,
+ "outputTokens": 6,
+ "latencyMs": 1591
+ },
+ {
+ "questionId": "q151",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "11763",
+ "actual": "11763",
+ "correct": true,
+ "inputTokens": 15191,
+ "outputTokens": 3,
+ "latencyMs": 1942
+ },
+ {
+ "questionId": "q151",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "11763",
+ "actual": "11763",
+ "correct": true,
+ "inputTokens": 17414,
+ "outputTokens": 6,
+ "latencyMs": 1340
+ },
+ {
+ "questionId": "q151",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "11763",
+ "actual": "11763",
+ "correct": true,
+ "inputTokens": 8792,
+ "outputTokens": 3,
+ "latencyMs": 1443
+ },
+ {
+ "questionId": "q151",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "11763",
+ "actual": "11763",
+ "correct": true,
+ "inputTokens": 9284,
+ "outputTokens": 6,
+ "latencyMs": 1732
+ },
+ {
+ "questionId": "q151",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "11763",
+ "actual": "11763",
+ "correct": true,
+ "inputTokens": 8560,
+ "outputTokens": 3,
+ "latencyMs": 1994
+ },
+ {
+ "questionId": "q151",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "11763",
+ "actual": "11763",
+ "correct": true,
+ "inputTokens": 9130,
+ "outputTokens": 6,
+ "latencyMs": 1198
+ },
+ {
+ "questionId": "q151",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "11763",
+ "actual": "11763",
+ "correct": true,
+ "inputTokens": 15485,
+ "outputTokens": 3,
+ "latencyMs": 5013
+ },
+ {
+ "questionId": "q151",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "11763",
+ "actual": "11763",
+ "correct": true,
+ "inputTokens": 15372,
+ "outputTokens": 6,
+ "latencyMs": 1463
+ },
+ {
+ "questionId": "q151",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "11763",
+ "actual": "11763",
+ "correct": true,
+ "inputTokens": 13175,
+ "outputTokens": 3,
+ "latencyMs": 1296
+ },
+ {
+ "questionId": "q151",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "11763",
+ "actual": "11763",
+ "correct": true,
+ "inputTokens": 14488,
+ "outputTokens": 6,
+ "latencyMs": 2877
+ },
+ {
+ "questionId": "q152",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "100",
+ "actual": "0",
+ "correct": false,
+ "inputTokens": 15188,
+ "outputTokens": 2,
+ "latencyMs": 2160
+ },
+ {
+ "questionId": "q152",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "100",
+ "actual": "0",
+ "correct": false,
+ "inputTokens": 17406,
+ "outputTokens": 5,
+ "latencyMs": 1947
+ },
+ {
+ "questionId": "q152",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "100",
+ "actual": "0",
+ "correct": false,
+ "inputTokens": 8789,
+ "outputTokens": 2,
+ "latencyMs": 1222
+ },
+ {
+ "questionId": "q152",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "100",
+ "actual": "0",
+ "correct": false,
+ "inputTokens": 9276,
+ "outputTokens": 5,
+ "latencyMs": 1487
+ },
+ {
+ "questionId": "q152",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "100",
+ "actual": "0",
+ "correct": false,
+ "inputTokens": 8557,
+ "outputTokens": 2,
+ "latencyMs": 1450
+ },
+ {
+ "questionId": "q152",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "100",
+ "actual": "0",
+ "correct": false,
+ "inputTokens": 9122,
+ "outputTokens": 5,
+ "latencyMs": 1358
+ },
+ {
+ "questionId": "q152",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "100",
+ "actual": "0",
+ "correct": false,
+ "inputTokens": 15482,
+ "outputTokens": 2,
+ "latencyMs": 873
+ },
+ {
+ "questionId": "q152",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "100",
+ "actual": "100",
+ "correct": true,
+ "inputTokens": 15364,
+ "outputTokens": 5,
+ "latencyMs": 1500
+ },
+ {
+ "questionId": "q152",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "100",
+ "actual": "0",
+ "correct": false,
+ "inputTokens": 13172,
+ "outputTokens": 2,
+ "latencyMs": 7031
+ },
+ {
+ "questionId": "q152",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "100",
+ "actual": "0",
+ "correct": false,
+ "inputTokens": 14480,
+ "outputTokens": 5,
+ "latencyMs": 1916
+ },
+ {
+ "questionId": "q153",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "15404143",
+ "actual": "43115556",
+ "correct": false,
+ "inputTokens": 15189,
+ "outputTokens": 4,
+ "latencyMs": 3324
+ },
+ {
+ "questionId": "q153",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "15404143",
+ "actual": "13,847,892",
+ "correct": false,
+ "inputTokens": 17407,
+ "outputTokens": 9,
+ "latencyMs": 1607
+ },
+ {
+ "questionId": "q153",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "15404143",
+ "actual": "10419582",
+ "correct": false,
+ "inputTokens": 8790,
+ "outputTokens": 4,
+ "latencyMs": 900
+ },
+ {
+ "questionId": "q153",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "15404143",
+ "actual": "13,847,892",
+ "correct": false,
+ "inputTokens": 9277,
+ "outputTokens": 9,
+ "latencyMs": 1385
+ },
+ {
+ "questionId": "q153",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "15404143",
+ "actual": "10419582",
+ "correct": false,
+ "inputTokens": 8558,
+ "outputTokens": 4,
+ "latencyMs": 1922
+ },
+ {
+ "questionId": "q153",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "15404143",
+ "actual": "15,847,892",
+ "correct": false,
+ "inputTokens": 9123,
+ "outputTokens": 9,
+ "latencyMs": 1230
+ },
+ {
+ "questionId": "q153",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "15404143",
+ "actual": "10419580",
+ "correct": false,
+ "inputTokens": 15483,
+ "outputTokens": 4,
+ "latencyMs": 1716
+ },
+ {
+ "questionId": "q153",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "15404143",
+ "actual": "13,847,892",
+ "correct": false,
+ "inputTokens": 15365,
+ "outputTokens": 9,
+ "latencyMs": 1384
+ },
+ {
+ "questionId": "q153",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "15404143",
+ "actual": "Total number of stars across all repositories is 4,978,155.",
+ "correct": false,
+ "inputTokens": 13173,
+ "outputTokens": 16,
+ "latencyMs": 3411
+ },
+ {
+ "questionId": "q153",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "15404143",
+ "actual": "13,847,892",
+ "correct": false,
+ "inputTokens": 14481,
+ "outputTokens": 9,
+ "latencyMs": 1539
+ },
+ {
+ "questionId": "q154",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "100",
+ "actual": "77",
+ "correct": false,
+ "inputTokens": 15189,
+ "outputTokens": 2,
+ "latencyMs": 2523
+ },
+ {
+ "questionId": "q154",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "100",
+ "actual": "100",
+ "correct": true,
+ "inputTokens": 17408,
+ "outputTokens": 5,
+ "latencyMs": 1885
+ },
+ {
+ "questionId": "q154",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "100",
+ "actual": "42",
+ "correct": false,
+ "inputTokens": 8790,
+ "outputTokens": 2,
+ "latencyMs": 1148
+ },
+ {
+ "questionId": "q154",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "100",
+ "actual": "100",
+ "correct": true,
+ "inputTokens": 9278,
+ "outputTokens": 5,
+ "latencyMs": 1378
+ },
+ {
+ "questionId": "q154",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "100",
+ "actual": "42",
+ "correct": false,
+ "inputTokens": 8558,
+ "outputTokens": 2,
+ "latencyMs": 1364
+ },
+ {
+ "questionId": "q154",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "100",
+ "actual": "100",
+ "correct": true,
+ "inputTokens": 9124,
+ "outputTokens": 5,
+ "latencyMs": 1125
+ },
+ {
+ "questionId": "q154",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "100",
+ "actual": "104",
+ "correct": false,
+ "inputTokens": 15483,
+ "outputTokens": 2,
+ "latencyMs": 1276
+ },
+ {
+ "questionId": "q154",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "100",
+ "actual": "100",
+ "correct": true,
+ "inputTokens": 15366,
+ "outputTokens": 5,
+ "latencyMs": 1331
+ },
+ {
+ "questionId": "q154",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "100",
+ "actual": "77",
+ "correct": false,
+ "inputTokens": 13173,
+ "outputTokens": 2,
+ "latencyMs": 1534
+ },
+ {
+ "questionId": "q154",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "100",
+ "actual": "100",
+ "correct": true,
+ "inputTokens": 14482,
+ "outputTokens": 5,
+ "latencyMs": 1282
+ },
+ {
+ "questionId": "q155",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "100",
+ "actual": "19",
+ "correct": false,
+ "inputTokens": 15189,
+ "outputTokens": 2,
+ "latencyMs": 2206
+ },
+ {
+ "questionId": "q155",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "100",
+ "actual": "71",
+ "correct": false,
+ "inputTokens": 17408,
+ "outputTokens": 5,
+ "latencyMs": 1568
+ },
+ {
+ "questionId": "q155",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "100",
+ "actual": "15",
+ "correct": false,
+ "inputTokens": 8790,
+ "outputTokens": 2,
+ "latencyMs": 1478
+ },
+ {
+ "questionId": "q155",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "100",
+ "actual": "42",
+ "correct": false,
+ "inputTokens": 9278,
+ "outputTokens": 5,
+ "latencyMs": 1314
+ },
+ {
+ "questionId": "q155",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "100",
+ "actual": "12",
+ "correct": false,
+ "inputTokens": 8558,
+ "outputTokens": 2,
+ "latencyMs": 2149
+ },
+ {
+ "questionId": "q155",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "100",
+ "actual": "47",
+ "correct": false,
+ "inputTokens": 9124,
+ "outputTokens": 5,
+ "latencyMs": 1485
+ },
+ {
+ "questionId": "q155",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "100",
+ "actual": "34",
+ "correct": false,
+ "inputTokens": 15483,
+ "outputTokens": 2,
+ "latencyMs": 1043
+ },
+ {
+ "questionId": "q155",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "100",
+ "actual": "71",
+ "correct": false,
+ "inputTokens": 15366,
+ "outputTokens": 5,
+ "latencyMs": 1371
+ },
+ {
+ "questionId": "q155",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "100",
+ "actual": "34",
+ "correct": false,
+ "inputTokens": 13173,
+ "outputTokens": 2,
+ "latencyMs": 1693
+ },
+ {
+ "questionId": "q155",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "100",
+ "actual": "71",
+ "correct": false,
+ "inputTokens": 14482,
+ "outputTokens": 5,
+ "latencyMs": 1237
+ },
+ {
+ "questionId": "q156",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "76",
+ "actual": "82",
+ "correct": false,
+ "inputTokens": 15189,
+ "outputTokens": 2,
+ "latencyMs": 927
+ },
+ {
+ "questionId": "q156",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "76",
+ "actual": "100",
+ "correct": false,
+ "inputTokens": 17408,
+ "outputTokens": 5,
+ "latencyMs": 1274
+ },
+ {
+ "questionId": "q156",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "76",
+ "actual": "34",
+ "correct": false,
+ "inputTokens": 8790,
+ "outputTokens": 2,
+ "latencyMs": 2541
+ },
+ {
+ "questionId": "q156",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "76",
+ "actual": "100",
+ "correct": false,
+ "inputTokens": 9278,
+ "outputTokens": 5,
+ "latencyMs": 1116
+ },
+ {
+ "questionId": "q156",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "76",
+ "actual": "34",
+ "correct": false,
+ "inputTokens": 8558,
+ "outputTokens": 2,
+ "latencyMs": 997
+ },
+ {
+ "questionId": "q156",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "76",
+ "actual": "100",
+ "correct": false,
+ "inputTokens": 9124,
+ "outputTokens": 5,
+ "latencyMs": 1513
+ },
+ {
+ "questionId": "q156",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "76",
+ "actual": "104",
+ "correct": false,
+ "inputTokens": 15483,
+ "outputTokens": 2,
+ "latencyMs": 3168
+ },
+ {
+ "questionId": "q156",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "76",
+ "actual": "100",
+ "correct": false,
+ "inputTokens": 15366,
+ "outputTokens": 5,
+ "latencyMs": 1498
+ },
+ {
+ "questionId": "q156",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "76",
+ "actual": "66",
+ "correct": false,
+ "inputTokens": 13173,
+ "outputTokens": 2,
+ "latencyMs": 1600
+ },
+ {
+ "questionId": "q156",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "76",
+ "actual": "100",
+ "correct": false,
+ "inputTokens": 14482,
+ "outputTokens": 5,
+ "latencyMs": 1519
+ },
+ {
+ "questionId": "q157",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "100",
+ "actual": "77",
+ "correct": false,
+ "inputTokens": 15189,
+ "outputTokens": 2,
+ "latencyMs": 1809
+ },
+ {
+ "questionId": "q157",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "100",
+ "actual": "89",
+ "correct": false,
+ "inputTokens": 17409,
+ "outputTokens": 5,
+ "latencyMs": 1409
+ },
+ {
+ "questionId": "q157",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "100",
+ "actual": "66",
+ "correct": false,
+ "inputTokens": 8790,
+ "outputTokens": 2,
+ "latencyMs": 1367
+ },
+ {
+ "questionId": "q157",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "100",
+ "actual": "73",
+ "correct": false,
+ "inputTokens": 9279,
+ "outputTokens": 5,
+ "latencyMs": 1296
+ },
+ {
+ "questionId": "q157",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "100",
+ "actual": "66",
+ "correct": false,
+ "inputTokens": 8558,
+ "outputTokens": 2,
+ "latencyMs": 1162
+ },
+ {
+ "questionId": "q157",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "100",
+ "actual": "89",
+ "correct": false,
+ "inputTokens": 9125,
+ "outputTokens": 5,
+ "latencyMs": 1435
+ },
+ {
+ "questionId": "q157",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "100",
+ "actual": "77",
+ "correct": false,
+ "inputTokens": 15483,
+ "outputTokens": 2,
+ "latencyMs": 1774
+ },
+ {
+ "questionId": "q157",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "100",
+ "actual": "95",
+ "correct": false,
+ "inputTokens": 15367,
+ "outputTokens": 5,
+ "latencyMs": 1479
+ },
+ {
+ "questionId": "q157",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "100",
+ "actual": "66",
+ "correct": false,
+ "inputTokens": 13173,
+ "outputTokens": 2,
+ "latencyMs": 2710
+ },
+ {
+ "questionId": "q157",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "100",
+ "actual": "95",
+ "correct": false,
+ "inputTokens": 14483,
+ "outputTokens": 5,
+ "latencyMs": 1272
+ },
+ {
+ "questionId": "q158",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "95",
+ "actual": "42",
+ "correct": false,
+ "inputTokens": 15189,
+ "outputTokens": 2,
+ "latencyMs": 3038
+ },
+ {
+ "questionId": "q158",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "95",
+ "actual": "42",
+ "correct": false,
+ "inputTokens": 17409,
+ "outputTokens": 5,
+ "latencyMs": 1562
+ },
+ {
+ "questionId": "q158",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "95",
+ "actual": "38",
+ "correct": false,
+ "inputTokens": 8790,
+ "outputTokens": 2,
+ "latencyMs": 1536
+ },
+ {
+ "questionId": "q158",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "95",
+ "actual": "42",
+ "correct": false,
+ "inputTokens": 9279,
+ "outputTokens": 5,
+ "latencyMs": 1216
+ },
+ {
+ "questionId": "q158",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "95",
+ "actual": "34",
+ "correct": false,
+ "inputTokens": 8558,
+ "outputTokens": 2,
+ "latencyMs": 1760
+ },
+ {
+ "questionId": "q158",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "95",
+ "actual": "42",
+ "correct": false,
+ "inputTokens": 9125,
+ "outputTokens": 5,
+ "latencyMs": 1255
+ },
+ {
+ "questionId": "q158",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "95",
+ "actual": "66",
+ "correct": false,
+ "inputTokens": 15483,
+ "outputTokens": 2,
+ "latencyMs": 1683
+ },
+ {
+ "questionId": "q158",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "95",
+ "actual": "47",
+ "correct": false,
+ "inputTokens": 15367,
+ "outputTokens": 5,
+ "latencyMs": 2256
+ },
+ {
+ "questionId": "q158",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "95",
+ "actual": "38",
+ "correct": false,
+ "inputTokens": 13173,
+ "outputTokens": 2,
+ "latencyMs": 2831
+ },
+ {
+ "questionId": "q158",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "95",
+ "actual": "47",
+ "correct": false,
+ "inputTokens": 14483,
+ "outputTokens": 5,
+ "latencyMs": 1980
+ },
+ {
+ "questionId": "q159",
+ "format": "json",
+ "model": "gpt-4o-mini",
+ "expected": "83",
+ "actual": "66",
+ "correct": false,
+ "inputTokens": 15189,
+ "outputTokens": 2,
+ "latencyMs": 1327
+ },
+ {
+ "questionId": "q159",
+ "format": "json",
+ "model": "claude-haiku-4-5",
+ "expected": "83",
+ "actual": "71",
+ "correct": false,
+ "inputTokens": 17409,
+ "outputTokens": 5,
+ "latencyMs": 1894
+ },
+ {
+ "questionId": "q159",
+ "format": "toon",
+ "model": "gpt-4o-mini",
+ "expected": "83",
+ "actual": "34",
+ "correct": false,
+ "inputTokens": 8790,
+ "outputTokens": 2,
+ "latencyMs": 784
+ },
+ {
+ "questionId": "q159",
+ "format": "toon",
+ "model": "claude-haiku-4-5",
+ "expected": "83",
+ "actual": "73",
+ "correct": false,
+ "inputTokens": 9279,
+ "outputTokens": 5,
+ "latencyMs": 1422
+ },
+ {
+ "questionId": "q159",
+ "format": "csv",
+ "model": "gpt-4o-mini",
+ "expected": "83",
+ "actual": "34",
+ "correct": false,
+ "inputTokens": 8558,
+ "outputTokens": 2,
+ "latencyMs": 2644
+ },
+ {
+ "questionId": "q159",
+ "format": "csv",
+ "model": "claude-haiku-4-5",
+ "expected": "83",
+ "actual": "73",
+ "correct": false,
+ "inputTokens": 9125,
+ "outputTokens": 5,
+ "latencyMs": 1109
+ },
+ {
+ "questionId": "q159",
+ "format": "markdown-kv",
+ "model": "gpt-4o-mini",
+ "expected": "83",
+ "actual": "66",
+ "correct": false,
+ "inputTokens": 15483,
+ "outputTokens": 2,
+ "latencyMs": 1826
+ },
+ {
+ "questionId": "q159",
+ "format": "markdown-kv",
+ "model": "claude-haiku-4-5",
+ "expected": "83",
+ "actual": "71",
+ "correct": false,
+ "inputTokens": 15367,
+ "outputTokens": 5,
+ "latencyMs": 1342
+ },
+ {
+ "questionId": "q159",
+ "format": "yaml",
+ "model": "gpt-4o-mini",
+ "expected": "83",
+ "actual": "38",
+ "correct": false,
+ "inputTokens": 13173,
+ "outputTokens": 2,
+ "latencyMs": 2055
+ },
+ {
+ "questionId": "q159",
+ "format": "yaml",
+ "model": "claude-haiku-4-5",
+ "expected": "83",
+ "actual": "71",
+ "correct": false,
+ "inputTokens": 14483,
+ "outputTokens": 5,
+ "latencyMs": 1537
+ }
+]
\ No newline at end of file
diff --git a/benchmarks/results/accuracy/report.md b/benchmarks/results/accuracy/report.md
new file mode 100644
index 0000000..9991de9
--- /dev/null
+++ b/benchmarks/results/accuracy/report.md
@@ -0,0 +1,96 @@
+### Retrieval Accuracy
+
+Tested across **2 LLMs** with data retrieval tasks:
+
+```
+gpt-4o-mini โโโโโโโโโโโโโโโโโโโโ 72.3% accuracy
+claude-haiku-4-5 โโโโโโโโโโโโโโโโโโโโ 76.7% accuracy
+```
+
+**TOON achieves 73.9% accuracy (vs JSON's 73.6%) while using 46.3% fewer tokens.**
+
+| Format | Accuracy | Average Tokens |
+| ------ | -------- | -------------- |
+| `toon` | 73.9% | 4.678 |
+| `json` | 73.6% | 8.713 |
+| `markdown-kv` | 73.6% | 8.649 |
+| `csv` | 72.3% | 4.745 |
+| `yaml` | 71.7% | 7.091 |
+
+
+View detailed breakdown by dataset and model
+
+#### Performance by Dataset
+
+##### Uniform employee records (TOON optimal format)
+
+| Format | Accuracy | Tokens | Correct/Total |
+|--------|----------|--------|---------------|
+| `toon` | 72.4% | 2.483 | 84/116 |
+| `csv` | 69.0% | 2.337 | 80/116 |
+| `yaml` | 68.1% | 4.969 | 79/116 |
+| `markdown-kv` | 68.1% | 6.270 | 79/116 |
+| `json` | 68.1% | 6.347 | 79/116 |
+
+##### E-commerce orders with nested structures
+
+| Format | Accuracy | Tokens | Correct/Total |
+|--------|----------|--------|---------------|
+| `toon` | 84.1% | 5.967 | 74/88 |
+| `csv` | 83.0% | 6.735 | 73/88 |
+| `yaml` | 81.8% | 7.328 | 72/88 |
+| `markdown-kv` | 86.4% | 9.110 | 76/88 |
+| `json` | 84.1% | 9.694 | 74/88 |
+
+##### Time-series analytics data
+
+| Format | Accuracy | Tokens | Correct/Total |
+|--------|----------|--------|---------------|
+| `csv` | 72.4% | 1.393 | 42/58 |
+| `toon` | 70.7% | 1.515 | 41/58 |
+| `yaml` | 72.4% | 2.938 | 42/58 |
+| `json` | 74.1% | 3.665 | 43/58 |
+| `markdown-kv` | 70.7% | 3.779 | 41/58 |
+
+##### Popular GitHub repositories
+
+| Format | Accuracy | Tokens | Correct/Total |
+|--------|----------|--------|---------------|
+| `toon` | 64.3% | 8.745 | 36/56 |
+| `csv` | 62.5% | 8.513 | 35/56 |
+| `json` | 67.9% | 15.145 | 38/56 |
+| `markdown-kv` | 67.9% | 15.436 | 38/56 |
+| `yaml` | 62.5% | 13.129 | 35/56 |
+
+
+#### Performance by Model
+
+##### gpt-4o-mini
+
+| Format | Accuracy | Correct/Total |
+|--------|----------|---------------|
+| `toon` | 72.3% | 115/159 |
+| `json` | 71.7% | 114/159 |
+| `markdown-kv` | 70.4% | 112/159 |
+| `csv` | 69.2% | 110/159 |
+| `yaml` | 68.6% | 109/159 |
+
+##### claude-haiku-4-5
+
+| Format | Accuracy | Correct/Total |
+|--------|----------|---------------|
+| `markdown-kv` | 76.7% | 122/159 |
+| `toon` | 75.5% | 120/159 |
+| `json` | 75.5% | 120/159 |
+| `csv` | 75.5% | 120/159 |
+| `yaml` | 74.8% | 119/159 |
+
+
+#### Methodology
+
+- **Semantic validation**: LLM-as-judge validates responses semantically (not exact string matching).
+- **Token counting**: Using `gpt-tokenizer` with `o200k_base` encoding.
+- **Question types**: Field retrieval, aggregation, and filtering tasks.
+- **Real data**: Faker.js-generated datasets + GitHub repositories.
+
+
diff --git a/benchmarks/results/accuracy/summary.json b/benchmarks/results/accuracy/summary.json
new file mode 100644
index 0000000..b5dddc2
--- /dev/null
+++ b/benchmarks/results/accuracy/summary.json
@@ -0,0 +1,95 @@
+{
+ "formatResults": [
+ {
+ "format": "toon",
+ "accuracy": 0.7389937106918238,
+ "totalTokens": 4678,
+ "avgInputTokens": 4675,
+ "avgLatency": 1424,
+ "correctCount": 235,
+ "totalCount": 318
+ },
+ {
+ "format": "json",
+ "accuracy": 0.7358490566037735,
+ "totalTokens": 8713,
+ "avgInputTokens": 9177,
+ "avgLatency": 1678,
+ "correctCount": 234,
+ "totalCount": 318
+ },
+ {
+ "format": "markdown-kv",
+ "accuracy": 0.7358490566037735,
+ "totalTokens": 8649,
+ "avgInputTokens": 8242,
+ "avgLatency": 1724,
+ "correctCount": 234,
+ "totalCount": 318
+ },
+ {
+ "format": "csv",
+ "accuracy": 0.7232704402515723,
+ "totalTokens": 4745,
+ "avgInputTokens": 4878,
+ "avgLatency": 1573,
+ "correctCount": 230,
+ "totalCount": 318
+ },
+ {
+ "format": "yaml",
+ "accuracy": 0.7169811320754716,
+ "totalTokens": 7091,
+ "avgInputTokens": 7136,
+ "avgLatency": 1602,
+ "correctCount": 228,
+ "totalCount": 318
+ }
+ ],
+ "questions": 159,
+ "models": [
+ "gpt-4o-mini",
+ "claude-haiku-4-5"
+ ],
+ "datasets": [
+ {
+ "name": "tabular",
+ "description": "Uniform employee records (TOON optimal format)"
+ },
+ {
+ "name": "nested",
+ "description": "E-commerce orders with nested structures"
+ },
+ {
+ "name": "analytics",
+ "description": "Time-series analytics data"
+ },
+ {
+ "name": "github",
+ "description": "Popular GitHub repositories"
+ }
+ ],
+ "tokenCounts": {
+ "json-tabular": 6347,
+ "json-nested": 9694,
+ "json-analytics": 3665,
+ "json-github": 15145,
+ "toon-tabular": 2483,
+ "toon-nested": 5967,
+ "toon-analytics": 1515,
+ "toon-github": 8745,
+ "csv-tabular": 2337,
+ "csv-nested": 6735,
+ "csv-analytics": 1393,
+ "csv-github": 8513,
+ "markdown-kv-tabular": 6270,
+ "markdown-kv-nested": 9110,
+ "markdown-kv-analytics": 3779,
+ "markdown-kv-github": 15436,
+ "yaml-tabular": 4969,
+ "yaml-nested": 7328,
+ "yaml-analytics": 2938,
+ "yaml-github": 13129
+ },
+ "timestamp": "2025-10-27T10:46:35.127Z"
+}
\ No newline at end of file
diff --git a/benchmarks/results/token-efficiency.md b/benchmarks/results/token-efficiency.md
new file mode 100644
index 0000000..090397a
--- /dev/null
+++ b/benchmarks/results/token-efficiency.md
@@ -0,0 +1,141 @@
+### Token Efficiency
+
+```
+โญ GitHub Repositories โโโโโโโโโโโโโโโโโโโโโโโโโ 8,745 tokens (JSON: 15,145) ๐ฐ 42.3% saved
+๐ Analytics Time Series โโโโโโโโโโโโโโโโโโโโโโโโโ 3,631 tokens (JSON: 9,024) ๐ฐ 59.8% saved
+๐ฅ API Response โโโโโโโโโโโโโโโโโโโโโโโโโ 2,593 tokens (JSON: 4,589) ๐ฐ 43.5% saved
+๐ E-commerce Order โโโโโโโโโโโโโโโโโโโโโโโโโ 203 tokens (JSON: 338) ๐ฐ 39.9% saved
+```
+
+**Total:** 15,172 tokens (TOON) vs 29,096 tokens (JSON) โ 47.9% savings
+
+
+View detailed examples
+
+#### โญ GitHub Repositories
+
+**Configuration:** Top 100 GitHub repositories with stars, forks, and metadata
+
+**Savings:** 6,400 tokens (42.3% reduction)
+
+**JSON** (15,145 tokens):
+
+```json
+{
+ "repositories": [
+ {
+ "id": 28457823,
+ "name": "freeCodeCamp",
+ "repo": "freeCodeCamp/freeCodeCamp",
+ "description": "freeCodeCamp.org's open-source codebase and curriculum. Learn math, programming,...",
+ "createdAt": "2014-12-24T17:49:19Z",
+ "updatedAt": "2025-10-27T07:40:58Z",
+ "pushedAt": "2025-10-26T11:31:08Z",
+ "stars": 430828,
+ "watchers": 8582,
+ "forks": 42136,
+ "defaultBranch": "main"
+ },
+ {
+ "id": 132750724,
+ "name": "build-your-own-x",
+ "repo": "codecrafters-io/build-your-own-x",
+ "description": "Master programming by recreating your favorite technologies from scratch.",
+ "createdAt": "2018-05-09T12:03:18Z",
+ "updatedAt": "2025-10-27T07:43:25Z",
+ "pushedAt": "2025-10-10T18:45:01Z",
+ "stars": 430102,
+ "watchers": 6322,
+ "forks": 40388,
+ "defaultBranch": "master"
+ },
+ {
+ "id": 21737465,
+ "name": "awesome",
+ "repo": "sindresorhus/awesome",
+ "description": "๐ Awesome lists about all kinds of interesting topics",
+ "createdAt": "2014-07-11T13:42:37Z",
+ "updatedAt": "2025-10-27T07:44:27Z",
+ "pushedAt": "2025-10-23T17:26:53Z",
+ "stars": 409760,
+ "watchers": 8016,
+ "forks": 32015,
+ "defaultBranch": "main"
+ }
+ ]
+}
+```
+
+**TOON** (8,745 tokens):
+
+```
+repositories[3]{id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch}:
+ 28457823,freeCodeCamp,freeCodeCamp/freeCodeCamp,"freeCodeCamp.org's open-source codebase and curriculum. Learn math, programming,...","2014-12-24T17:49:19Z","2025-10-27T07:40:58Z","2025-10-26T11:31:08Z",430828,8582,42136,main
+ 132750724,build-your-own-x,codecrafters-io/build-your-own-x,Master programming by recreating your favorite technologies from scratch.,"2018-05-09T12:03:18Z","2025-10-27T07:43:25Z","2025-10-10T18:45:01Z",430102,6322,40388,master
+ 21737465,awesome,sindresorhus/awesome,๐ Awesome lists about all kinds of interesting topics,"2014-07-11T13:42:37Z","2025-10-27T07:44:27Z","2025-10-23T17:26:53Z",409760,8016,32015,main
+```
+
+---
+
+#### ๐ Analytics Time Series
+
+**Configuration:** 180 days of web metrics (views, clicks, conversions, revenue)
+
+**Savings:** 5,393 tokens (59.8% reduction)
+
+**JSON** (9,024 tokens):
+
+```json
+{
+ "metrics": [
+ {
+ "date": "2024-12-31",
+ "views": 3769,
+ "clicks": 400,
+ "conversions": 59,
+ "revenue": 198.98
+ },
+ {
+ "date": "2025-01-01",
+ "views": 5742,
+ "clicks": 463,
+ "conversions": 28,
+ "revenue": 295.77
+ },
+ {
+ "date": "2025-01-02",
+ "views": 3669,
+ "clicks": 336,
+ "conversions": 102,
+ "revenue": 624.23
+ },
+ {
+ "date": "2025-01-03",
+ "views": 1332,
+ "clicks": 304,
+ "conversions": 99,
+ "revenue": 113.06
+ },
+ {
+ "date": "2025-01-04",
+ "views": 1444,
+ "clicks": 222,
+ "conversions": 88,
+ "revenue": 986.69
+ }
+ ]
+}
+```
+
+**TOON** (3,631 tokens):
+
+```
+metrics[5]{date,views,clicks,conversions,revenue}:
+ 2024-12-31,3769,400,59,198.98
+ 2025-01-01,5742,463,28,295.77
+ 2025-01-02,3669,336,102,624.23
+ 2025-01-03,1332,304,99,113.06
+ 2025-01-04,1444,222,88,986.69
+```
+
+
diff --git a/benchmarks/scripts/accuracy-benchmark.ts b/benchmarks/scripts/accuracy-benchmark.ts
new file mode 100644
index 0000000..9867e5c
--- /dev/null
+++ b/benchmarks/scripts/accuracy-benchmark.ts
@@ -0,0 +1,140 @@
+/**
+ * TOON LLM Accuracy Benchmark
+ *
+ * Main entry point that orchestrates the full benchmark:
+ * 1. Generate questions from datasets
+ * 2. Format data in all formats (JSON, TOON, YAML, Markdown-kv)
+ * 3. Evaluate each question with each format using LLMs
+ * 4. Generate reports
+ */
+
+import type { EvaluationResult, Question } from '../src/types'
+import * as fsp from 'node:fs/promises'
+import * as path from 'node:path'
+import { consola } from 'consola'
+import pMap from 'p-map'
+import { BENCHMARKS_DIR, DEFAULT_CONCURRENCY, DRY_RUN, DRY_RUN_LIMITS, ROOT_DIR } from '../src/constants'
+import { datasets } from '../src/datasets'
+import { evaluateQuestion, models } from '../src/evaluate'
+import { formatters } from '../src/formatters'
+import { generateQuestions } from '../src/questions'
+import { calculateFormatResults, calculateTokenCounts, saveResults } from '../src/report'
+
+consola.start('LLM Accuracy Benchmark for TOON')
+
+// Check if results already exist
+const resultsDir = path.join(BENCHMARKS_DIR, 'results', 'accuracy')
+const rawResultsPath = path.join(resultsDir, 'raw-results.json')
+const summaryPath = path.join(resultsDir, 'summary.json')
+
+let existingResults: EvaluationResult[] | undefined
+let existingTokenCounts: Record | undefined
+
+try {
+ const [rawData, summaryData] = await Promise.all([
+ fsp.readFile(rawResultsPath, 'utf-8'),
+ fsp.readFile(summaryPath, 'utf-8'),
+ ])
+ existingResults = JSON.parse(rawData)
+ const summary = JSON.parse(summaryData)
+ existingTokenCounts = summary.tokenCounts
+ consola.info('Found existing results โ regenerating report only')
+}
+catch {
+ // Results don't exist, will run full evaluation
+}
+
+if (DRY_RUN) {
+ consola.info('Limiting questions and models for dry run')
+}
+
+let questions = generateQuestions()
+
+// Apply dry run limits if enabled
+if (DRY_RUN && DRY_RUN_LIMITS.maxQuestions) {
+ questions = questions.slice(0, DRY_RUN_LIMITS.maxQuestions)
+}
+
+// Filter models for dry run
+const activeModels = DRY_RUN && DRY_RUN_LIMITS.allowedModels.length > 0
+ ? Object.fromEntries(
+ Object.entries(models).filter(([name]) => DRY_RUN_LIMITS.allowedModels.includes(name)),
+ )
+ : models
+
+let results: EvaluationResult[]
+let tokenCounts: Record
+
+if (existingResults && existingTokenCounts) {
+ // Reuse existing results
+ results = existingResults
+ tokenCounts = existingTokenCounts
+}
+else {
+ // Run full evaluation
+ consola.info(`Evaluating ${questions.length} questions`)
+ consola.info(`Testing ${Object.keys(formatters).length} formats`)
+ consola.info(`Using ${Object.keys(activeModels).length} models: ${Object.keys(activeModels).join(', ')}`)
+
+ // Calculate token counts for all format+dataset combinations
+ tokenCounts = calculateTokenCounts(formatters)
+
+ // Format datasets once (reuse for all questions)
+ const formattedDatasets: Record> = {}
+ for (const [formatName, formatter] of Object.entries(formatters)) {
+ formattedDatasets[formatName] = {}
+ for (const dataset of datasets) {
+ const formatted = formatter(dataset.data)
+ formattedDatasets[formatName]![dataset.name] = formatted
+ }
+ }
+
+ // Generate evaluation tasks
+ const tasks: { question: Question, formatName: string, modelName: string }[] = []
+ for (const question of questions) {
+ for (const [formatName] of Object.entries(formatters)) {
+ for (const [modelName] of Object.entries(activeModels)) {
+ tasks.push({ question, formatName, modelName })
+ }
+ }
+ }
+
+ const total = tasks.length
+
+ consola.start(`Running ${total} evaluations with concurrency: ${DEFAULT_CONCURRENCY}`)
+
+ // Evaluate all tasks in parallel
+ results = await pMap(
+ tasks,
+ async (task, index) => {
+ const formattedData = formattedDatasets[task.formatName]![task.question.dataset]!
+ const model = activeModels[task.modelName as keyof typeof activeModels]
+
+ const result = await evaluateQuestion(
+ task.question,
+ task.formatName,
+ formattedData,
+ model,
+ task.modelName,
+ )
+
+ // Progress update
+ if ((index + 1) % 10 === 0) {
+ const percent = (((index + 1) / total) * 100).toFixed(1)
+ console.log(`โณ Progress: ${index + 1}/${total} (${percent}%)`)
+ }
+
+ return result
+ },
+ { concurrency: DEFAULT_CONCURRENCY },
+ )
+
+ consola.success('Evaluation complete!')
+}
+
+// Generate/regenerate markdown report
+const formatResults = calculateFormatResults(results, tokenCounts)
+await saveResults(results, formatResults, questions, tokenCounts)
+
+consola.info(`Results saved to: \`${path.relative(ROOT_DIR, resultsDir)}\``)
+consola.success(existingResults ? 'Markdown report regenerated!' : 'Evaluation complete!')
diff --git a/benchmarks/scripts/fetch-github-data.ts b/benchmarks/scripts/fetch-github-data.ts
new file mode 100644
index 0000000..335dd77
--- /dev/null
+++ b/benchmarks/scripts/fetch-github-data.ts
@@ -0,0 +1,78 @@
+import * as fsp from 'node:fs/promises'
+import * as path from 'node:path'
+import process from 'node:process'
+import { consola } from 'consola'
+import { ofetch } from 'ofetch'
+import { BENCHMARKS_DIR } from '../src/constants'
+
+try {
+ // Fetch top 100 repos from GitHub
+ const repoList = await searchTop100Repos()
+ const repos = await fetchRepoDetails(repoList)
+
+ if (repos.length === 0) {
+ consola.error('โ No repositories fetched. Exiting.')
+ process.exit(1)
+ }
+
+ // Sort by stars descending
+ repos.sort((a, b) => b.stars - a.stars)
+
+ await saveRepos(repos)
+
+ consola.success('Done!')
+}
+catch (error) {
+ consola.error(error)
+ process.exit(1)
+}
+
+async function searchTop100Repos(): Promise {
+ consola.start('Fetching top 100 starred repositories from GitHub APIโฆ')
+
+ const response = await ofetch<{ items: { full_name: string }[] }>(
+ 'https://api.github.com/search/repositories',
+ {
+ query: {
+ q: 'stars:>1',
+ sort: 'stars',
+ order: 'desc',
+ per_page: 100,
+ },
+ headers: {
+ 'Accept': 'application/vnd.github+json',
+ 'X-GitHub-Api-Version': '2022-11-28',
+ },
+ },
+ )
+
+ return response.items.map(item => item.full_name)
+}
+
+async function fetchRepoDetails(repoList: string[]): Promise[]> {
+ consola.start(`Fetching ${repoList.length} GitHub repositoriesโฆ`)
+
+ const repos: Record[] = []
+
+ for (let i = 0; i < repoList.length; i++) {
+ const repoPath = repoList[i]!
+ console.log(`[${i + 1}/${repoList.length}] Fetching ${repoPath}โฆ`)
+ const { repo } = await await ofetch(`https://ungh.cc/repos/${repoPath}`)
+ repos.push(repo)
+ }
+
+ consola.success(`Successfully fetched ${repos.length}/${repoList.length} repositories`)
+
+ return repos
+}
+
+async function saveRepos(repos: Record[]): Promise {
+ const outputDir = path.join(BENCHMARKS_DIR, 'data')
+ const outputFile = path.join(outputDir, 'github-repos.json')
+
+ await fsp.mkdir(outputDir, { recursive: true })
+ await fsp.writeFile(outputFile, JSON.stringify(repos, undefined, 2))
+
+ const relativePath = path.relative(BENCHMARKS_DIR, outputFile)
+ consola.info(`Saved to \`${relativePath}\``)
+}
diff --git a/benchmarks/scripts/token-efficiency-benchmark.ts b/benchmarks/scripts/token-efficiency-benchmark.ts
new file mode 100644
index 0000000..5957115
--- /dev/null
+++ b/benchmarks/scripts/token-efficiency-benchmark.ts
@@ -0,0 +1,228 @@
+import * as fsp from 'node:fs/promises'
+import * as path from 'node:path'
+import { faker } from '@faker-js/faker'
+import { consola } from 'consola'
+import { encode as encodeTokens } from 'gpt-tokenizer' // o200k_base encoding (default)
+import { encode } from '../../src/index'
+import githubRepos from '../data/github-repos.json' with { type: 'json' }
+import { BENCHMARKS_DIR, ROOT_DIR } from '../src/constants'
+
+interface BenchmarkResult {
+ name: string
+ emoji: string
+ description: string
+ data: any
+ jsonTokens: number
+ toonTokens: number
+ savings: number
+ savingsPercent: string
+ showDetailed: boolean
+}
+
+const outputFilePath = path.join(BENCHMARKS_DIR, 'results', 'token-efficiency.md')
+
+const BENCHMARK_EXAMPLES = [
+ {
+ name: 'GitHub Repositories',
+ emoji: 'โญ',
+ description: 'Top 100 GitHub repositories with stars, forks, and metadata',
+ getData: () => ({ repositories: githubRepos }),
+ showDetailed: true,
+ },
+ {
+ name: 'Analytics Time Series',
+ emoji: '๐',
+ description: '180 days of web metrics (views, clicks, conversions, revenue)',
+ getData: () => generateAnalytics(180),
+ showDetailed: true,
+ },
+ {
+ name: 'API Response',
+ emoji: '๐ฅ',
+ description: '50 user records with metadata and timestamps',
+ getData: () => generateUsers(50),
+ showDetailed: false,
+ },
+ {
+ name: 'E-commerce Order',
+ emoji: '๐',
+ description: 'Nested order with customer and items',
+ getData: generateOrder,
+ showDetailed: false,
+ },
+] as const
+
+// Calculate total savings
+let totalJsonTokens = 0
+let totalToonTokens = 0
+
+const results: BenchmarkResult[] = []
+
+for (const example of BENCHMARK_EXAMPLES) {
+ const data = await example.getData()
+
+ const jsonString = JSON.stringify(data, undefined, 2)
+ const toonString = encode(data)
+
+ const jsonTokens = encodeTokens(jsonString).length
+ const toonTokens = encodeTokens(toonString).length
+ const savings = jsonTokens - toonTokens
+ const savingsPercent = ((savings / jsonTokens) * 100).toFixed(1)
+
+ totalJsonTokens += jsonTokens
+ totalToonTokens += toonTokens
+
+ results.push({
+ name: example.name,
+ emoji: example.emoji,
+ description: example.description,
+ data,
+ jsonTokens,
+ toonTokens,
+ savings,
+ savingsPercent,
+ showDetailed: example.showDetailed,
+ })
+}
+
+const totalSavings = totalJsonTokens - totalToonTokens
+const totalSavingsPercent = ((totalSavings / totalJsonTokens) * 100).toFixed(1)
+
+// Generate ASCII bar chart visualization
+const barChartSection = results
+ .map((result) => {
+ const percentage = Number.parseFloat(result.savingsPercent)
+ const bar = generateBarChart(100 - percentage) // Invert to show TOON tokens
+ const jsonStr = result.jsonTokens.toLocaleString('en-US')
+ const toonStr = result.toonTokens.toLocaleString('en-US')
+ return `${result.emoji} ${result.name.padEnd(25)} ${bar} ${toonStr.padStart(6)} tokens (JSON: ${jsonStr.padStart(6)}) ๐ฐ ${result.savingsPercent}% saved`
+ })
+ .join('\n')
+
+// Generate detailed examples (only for selected examples)
+const detailedExamples = results
+ .filter(result => result.showDetailed)
+ .map((result, i, filtered) => {
+ // Truncate large datasets for display
+ let displayData = result.data
+ if (result.name === 'GitHub Repositories') {
+ displayData = {
+ repositories: result.data.repositories.slice(0, 3).map((repo: any) => ({
+ ...repo,
+ description: repo.description?.slice(0, 80) + (repo.description?.length > 80 ? '...' : ''),
+ })),
+ }
+ }
+ else if (result.name === 'Analytics Time Series') {
+ displayData = { metrics: result.data.metrics.slice(0, 5) }
+ }
+
+ const separator = i < filtered.length - 1 ? '\n\n---' : ''
+
+ return `#### ${result.emoji} ${result.name}
+
+**Configuration:** ${result.description}
+
+**Savings:** ${result.savings.toLocaleString('en-US')} tokens (${result.savingsPercent}% reduction)
+
+**JSON** (${result.jsonTokens.toLocaleString('en-US')} tokens):
+
+\`\`\`json
+${JSON.stringify(displayData, undefined, 2)}
+\`\`\`
+
+**TOON** (${result.toonTokens.toLocaleString('en-US')} tokens):
+
+\`\`\`
+${encode(displayData)}
+\`\`\`${separator}`
+ })
+ .join('\n\n')
+
+const markdown = `### Token Efficiency
+
+\`\`\`
+${barChartSection}
+\`\`\`
+
+**Total:** ${totalToonTokens.toLocaleString('en-US')} tokens (TOON) vs ${totalJsonTokens.toLocaleString('en-US')} tokens (JSON) โ ${totalSavingsPercent}% savings
+
+
+View detailed examples
+
+${detailedExamples}
+
+
+`.trimStart()
+
+console.log(markdown)
+
+await fsp.mkdir(path.join(BENCHMARKS_DIR, 'results'), { recursive: true })
+await fsp.writeFile(outputFilePath, markdown, 'utf-8')
+
+consola.success(`Benchmark written to \`${path.relative(ROOT_DIR, outputFilePath)}\``)
+
+// Generate ASCII bar chart
+function generateBarChart(percentage: number, maxWidth: number = 25): string {
+ const filled = Math.round((percentage / 100) * maxWidth)
+ const empty = maxWidth - filled
+ return 'โ'.repeat(filled) + 'โ'.repeat(empty)
+}
+
+// Generate analytics time series data
+function generateAnalytics(days: number) {
+ return {
+ metrics: Array.from({ length: days }, (_, i) => {
+ const date = new Date(2025, 0, 1)
+ date.setDate(date.getDate() + i)
+ return {
+ date: date.toISOString().split('T')[0],
+ views: Math.floor(Math.random() * 5000) + 1000,
+ clicks: Math.floor(Math.random() * 500) + 50,
+ conversions: Math.floor(Math.random() * 100) + 10,
+ revenue: Number((Math.random() * 1000 + 100).toFixed(2)),
+ }
+ }),
+ }
+}
+
+// Generate user API response
+function generateUsers(count: number) {
+ return {
+ users: Array.from({ length: count }, (_, i) => ({
+ id: i + 1,
+ name: faker.person.fullName(),
+ email: faker.internet.email(),
+ role: faker.helpers.arrayElement(['admin', 'user', 'moderator']),
+ active: faker.datatype.boolean(),
+ createdAt: faker.date.past({ years: 2 }).toISOString(),
+ lastLogin: faker.date.recent({ days: 30 }).toISOString(),
+ })),
+ total: count,
+ page: 1,
+ }
+}
+
+// Generate nested e-commerce order
+function generateOrder() {
+ return {
+ orderId: faker.string.alphanumeric({ length: 12, casing: 'upper' }),
+ customer: {
+ id: faker.number.int({ min: 1000, max: 9999 }),
+ name: faker.person.fullName(),
+ email: faker.internet.email(),
+ phone: faker.phone.number(),
+ },
+ items: Array.from({ length: faker.number.int({ min: 2, max: 5 }) }, () => ({
+ sku: faker.string.alphanumeric({ length: 8, casing: 'upper' }),
+ name: faker.commerce.productName(),
+ quantity: faker.number.int({ min: 1, max: 5 }),
+ price: Number(faker.commerce.price({ min: 10, max: 200 })),
+ })),
+ subtotal: Number(faker.commerce.price({ min: 100, max: 500 })),
+ tax: Number(faker.commerce.price({ min: 10, max: 50 })),
+ total: Number(faker.commerce.price({ min: 110, max: 550 })),
+ status: faker.helpers.arrayElement(['pending', 'processing', 'shipped', 'delivered']),
+ createdAt: faker.date.recent({ days: 7 }).toISOString(),
+ }
+}
diff --git a/benchmarks/src/constants.ts b/benchmarks/src/constants.ts
new file mode 100644
index 0000000..e146db0
--- /dev/null
+++ b/benchmarks/src/constants.ts
@@ -0,0 +1,39 @@
+import process from 'node:process'
+import * as url from 'node:url'
+
+export const ROOT_DIR: string = url.fileURLToPath(new URL('../../', import.meta.url))
+export const BENCHMARKS_DIR: string = url.fileURLToPath(new URL('../', import.meta.url))
+
+/**
+ * Benchmark execution configuration
+ */
+
+/**
+ * Enable dry run mode for quick testing with limited AI requests
+ *
+ * @remarks
+ * Set via environment variable: `DRY_RUN=true`
+ */
+export const DRY_RUN: boolean = process.env.DRY_RUN === 'true'
+
+/**
+ * Limits applied when DRY_RUN is enabled
+ */
+export const DRY_RUN_LIMITS = {
+ /** Maximum number of questions to evaluate */
+ maxQuestions: 10,
+ /** Maximum number of formats to test */
+ maxFormats: undefined as number | undefined,
+ /** Models to use in dry run */
+ allowedModels: [] as string[],
+}
+
+/**
+ * Default concurrency for parallel evaluations
+ */
+export const DEFAULT_CONCURRENCY = 20
+
+/**
+ * Delay between API requests to avoid rate limiting (in milliseconds)
+ */
+export const RATE_LIMIT_DELAY_MS = 100
diff --git a/benchmarks/src/datasets.ts b/benchmarks/src/datasets.ts
new file mode 100644
index 0000000..87643f2
--- /dev/null
+++ b/benchmarks/src/datasets.ts
@@ -0,0 +1,146 @@
+/**
+ * Datasets for TOON benchmarks
+ *
+ * These datasets are designed to test TOON's strengths and weaknesses:
+ * - Tabular: Uniform records (TOON optimal)
+ * - Nested: Complex structures with nested objects
+ * - Analytics: Time-series data
+ */
+
+import type { Dataset } from './types'
+import { faker } from '@faker-js/faker'
+import githubRepos from '../data/github-repos.json' with { type: 'json' }
+
+// Seed for reproducibility
+faker.seed(12345)
+
+/**
+ * Tabular dataset: 100 uniform employee records
+ *
+ * @remarks
+ * Tests TOON's tabular array format
+ */
+const departments = ['Engineering', 'Sales', 'Marketing', 'HR', 'Operations', 'Finance']
+const tabularDataset: Dataset = {
+ name: 'tabular',
+ description: 'Uniform employee records (TOON optimal format)',
+ data: {
+ employees: Array.from({ length: 100 }, (_, i) => {
+ const yearsExp = faker.number.int({ min: 1, max: 20 })
+ return {
+ id: i + 1,
+ name: faker.person.fullName(),
+ email: faker.internet.email().toLowerCase(),
+ department: departments[i % departments.length]!,
+ salary: faker.number.int({ min: 45000, max: 150000 }),
+ yearsExperience: yearsExp,
+ active: faker.datatype.boolean(0.8), // 80% active
+ }
+ }),
+ },
+}
+
+/**
+ * Nested dataset: 50 e-commerce orders with nested structures
+ *
+ * @remarks
+ * Tests TOON's handling of complex nested objects
+ */
+const productNames = ['Wireless Mouse', 'USB Cable', 'Laptop Stand', 'Keyboard', 'Webcam', 'Headphones', 'Monitor', 'Desk Lamp']
+const statuses = ['pending', 'processing', 'shipped', 'delivered', 'cancelled']
+
+const nestedDataset: Dataset = {
+ name: 'nested',
+ description: 'E-commerce orders with nested structures',
+ data: {
+ orders: Array.from({ length: 50 }, (_, i) => {
+ const customerId = (i % 20) + 1
+ const itemCount = faker.number.int({ min: 1, max: 4 })
+
+ const items = Array.from({ length: itemCount }, (_, j) => {
+ const price = faker.number.float({ min: 9.99, max: 199.99, fractionDigits: 2 })
+ const quantity = faker.number.int({ min: 1, max: 5 })
+ return {
+ sku: `SKU-${faker.string.alphanumeric({ length: 6 }).toUpperCase()}`,
+ name: productNames[j % productNames.length]!,
+ quantity,
+ price,
+ }
+ })
+
+ const total = Number(items.reduce((sum, item) => sum + (item.price * item.quantity), 0).toFixed(2))
+
+ return {
+ orderId: `ORD-${String(i + 1).padStart(4, '0')}`,
+ customer: {
+ id: customerId,
+ name: faker.person.fullName(),
+ email: faker.internet.email().toLowerCase(),
+ },
+ items,
+ total,
+ status: statuses[i % statuses.length]!,
+ orderDate: faker.date.recent({ days: 90 }).toISOString().split('T')[0],
+ }
+ }),
+ },
+}
+
+/**
+ * Analytics dataset: 60 days of time-series metrics
+ *
+ * @remarks
+ * Tests TOON's handling of numeric data and date fields
+ */
+const analyticsDataset: Dataset = {
+ name: 'analytics',
+ description: 'Time-series analytics data',
+ data: {
+ metrics: Array.from({ length: 60 }, (_, i) => {
+ const date = new Date('2025-01-01')
+ date.setDate(date.getDate() + i)
+
+ // Simulate realistic web traffic with some variation
+ const baseViews = 5000
+ const weekendMultiplier = date.getDay() === 0 || date.getDay() === 6 ? 0.7 : 1.0
+ const views = Math.round(baseViews * weekendMultiplier + faker.number.int({ min: -1000, max: 3000 }))
+ const clicks = Math.round(views * faker.number.float({ min: 0.02, max: 0.08 }))
+ const conversions = Math.round(clicks * faker.number.float({ min: 0.05, max: 0.15 }))
+ const avgOrderValue = faker.number.float({ min: 49.99, max: 299.99 })
+ const revenue = Number((conversions * avgOrderValue).toFixed(2))
+
+ return {
+ date: date.toISOString().split('T')[0]!,
+ views,
+ clicks,
+ conversions,
+ revenue,
+ bounceRate: faker.number.float({ min: 0.3, max: 0.7, fractionDigits: 2 }),
+ }
+ }),
+ },
+}
+
+/**
+ * GitHub dataset: Popular repositories
+ *
+ * @remarks
+ * Tests TOON's tabular format with real-world data
+ */
+const githubDataset: Dataset = {
+ name: 'github',
+ description: 'Popular GitHub repositories',
+ data: {
+ repositories: githubRepos.slice(0, 200),
+ },
+}
+
+/**
+ * All datasets used in the benchmark
+ */
+export const datasets: Dataset[] = [
+ tabularDataset,
+ nestedDataset,
+ analyticsDataset,
+ githubDataset,
+]
diff --git a/benchmarks/src/evaluate.ts b/benchmarks/src/evaluate.ts
new file mode 100644
index 0000000..ec1c3ec
--- /dev/null
+++ b/benchmarks/src/evaluate.ts
@@ -0,0 +1,133 @@
+/**
+ * LLM evaluation logic for TOON benchmarks
+ *
+ * Handles:
+ * - Model configuration
+ * - Question evaluation with LLMs
+ * - Answer validation using LLM-as-judge
+ */
+
+import type { LanguageModelV2 } from '@ai-sdk/provider'
+import type { EvaluationResult, Question } from './types'
+import { setTimeout } from 'node:timers/promises'
+import { anthropic } from '@ai-sdk/anthropic'
+import { openai } from '@ai-sdk/openai'
+import { generateText } from 'ai'
+import { consola } from 'consola'
+import { RATE_LIMIT_DELAY_MS } from './constants'
+
+/**
+ * Models used for evaluation
+ */
+export const models: Record = {
+ 'gpt-4o-mini': openai('gpt-4o-mini'),
+ 'claude-haiku-4-5': anthropic('claude-haiku-4-5-20251001'),
+}
+
+/**
+ * Validate an answer using LLM-as-judge approach
+ * More robust than string matching for LLM outputs
+ */
+export async function validateAnswer(
+ actual: string,
+ expected: string,
+ question: string,
+): Promise {
+ const prompt = `You are validating answers to questions about structured data.
+
+Question: ${question}
+Expected answer: ${expected}
+Actual answer: ${actual}
+
+Is the actual answer correct? Consider:
+- Exact matches are correct
+- Semantically equivalent answers are correct (e.g., "50000" vs "$50,000" vs "50000 dollars")
+- Minor formatting differences are acceptable
+- Case-insensitive comparison for text
+
+Respond with only "YES" or "NO".`
+
+ try {
+ const { text } = await generateText({
+ model: models['gpt-4o-mini']!,
+ prompt,
+ temperature: 0,
+ maxOutputTokens: 16,
+ })
+
+ await setTimeout(RATE_LIMIT_DELAY_MS)
+
+ return text.trim().toUpperCase() === 'YES'
+ }
+ catch (error) {
+ consola.error('Validation error:', error)
+ // Fallback to simple string comparison
+ return actual.toLowerCase().trim() === expected.toLowerCase().trim()
+ }
+}
+
+/**
+ * Evaluate a single question with a specific format and model
+ */
+export async function evaluateQuestion(
+ question: Question,
+ formatName: string,
+ formattedData: string,
+ model: any,
+ modelName: string,
+): Promise {
+ const prompt = `Given the following data in ${formatName} format:
+
+\`\`\`
+${formattedData}
+\`\`\`
+
+Question: ${question.prompt}
+
+Provide only the direct answer, without any additional explanation or formatting.`
+
+ const startTime = Date.now()
+
+ try {
+ const { text, usage } = await generateText({
+ model,
+ prompt,
+ temperature: 0,
+ maxOutputTokens: 50,
+ })
+
+ await setTimeout(RATE_LIMIT_DELAY_MS)
+
+ const latencyMs = Date.now() - startTime
+ const correct = await validateAnswer(text.trim(), question.groundTruth, question.prompt)
+
+ return {
+ questionId: question.id,
+ format: formatName,
+ model: modelName,
+ expected: question.groundTruth,
+ actual: text.trim(),
+ correct,
+ inputTokens: usage.inputTokens ?? 0,
+ outputTokens: usage.outputTokens ?? 0,
+ latencyMs,
+ }
+ }
+ catch (error) {
+ consola.error(`Error evaluating ${question.id} with ${formatName}/${modelName}:`, error)
+
+ await setTimeout(RATE_LIMIT_DELAY_MS)
+
+ return {
+ questionId: question.id,
+ format: formatName,
+ model: modelName,
+ expected: question.groundTruth,
+ actual: '',
+ correct: false,
+ inputTokens: 0,
+ outputTokens: 0,
+ latencyMs: Date.now() - startTime,
+ }
+ }
+}
diff --git a/benchmarks/src/formatters.ts b/benchmarks/src/formatters.ts
new file mode 100644
index 0000000..e1081e3
--- /dev/null
+++ b/benchmarks/src/formatters.ts
@@ -0,0 +1,90 @@
+/**
+ * Format converters for TOON benchmarks
+ *
+ * Converts data to different formats:
+ * - JSON
+ * - TOON
+ * - CSV
+ * - Markdown key-value
+ * - YAML
+ */
+
+import { stringify as stringifyCSV } from 'csv-stringify/sync'
+import { stringify as stringifyYAML } from 'yaml'
+import { encode as encodeToon } from '../../src/index'
+
+export const formatters = {
+ 'json': (data: unknown): string => JSON.stringify(data, undefined, 2),
+ 'toon': (data: unknown): string => encodeToon(data),
+ 'csv': (data: unknown): string => toCSV(data),
+ 'markdown-kv': (data: unknown): string => toMarkdownKV(data),
+ 'yaml': (data: unknown): string => stringifyYAML(data),
+}
+
+function toCSV(data: unknown): string {
+ const sections: string[] = []
+
+ // Handle top-level object with arrays
+ if (typeof data === 'object' && data !== null && !Array.isArray(data)) {
+ for (const [key, value] of Object.entries(data)) {
+ if (Array.isArray(value) && value.length > 0) {
+ sections.push(`# ${key}`)
+ sections.push(stringifyCSV(value, { header: true }))
+ }
+ }
+ return sections.join('\n').trim()
+ }
+
+ // Root-level array
+ if (Array.isArray(data) && data.length > 0) {
+ return stringifyCSV(data, { header: true }).trim()
+ }
+
+ return ''
+}
+
+function toMarkdownKV(data: unknown, indent = 0): string {
+ const spaces = ' '.repeat(indent)
+ const lines: string[] = []
+
+ if (Array.isArray(data)) {
+ data.forEach((item, i) => {
+ if (typeof item === 'object' && item !== null && !Array.isArray(item)) {
+ Object.entries(item).forEach(([key, value]) => {
+ if (typeof value === 'object' && value !== null) {
+ lines.push(`${spaces}**${key}**:`)
+ lines.push(toMarkdownKV(value, indent + 1))
+ }
+ else {
+ lines.push(`${spaces}**${key}**: ${value}`)
+ }
+ })
+ if (i < data.length - 1)
+ lines.push('')
+ }
+ else {
+ lines.push(`${spaces}- ${item}`)
+ }
+ })
+ }
+ else if (typeof data === 'object' && data !== null) {
+ Object.entries(data).forEach(([key, value]) => {
+ if (Array.isArray(value)) {
+ lines.push(`${spaces}**${key}**:`)
+ lines.push(toMarkdownKV(value, indent + 1))
+ }
+ else if (typeof value === 'object' && value !== null) {
+ lines.push(`${spaces}**${key}**:`)
+ lines.push(toMarkdownKV(value, indent + 1))
+ }
+ else {
+ lines.push(`${spaces}**${key}**: ${value}`)
+ }
+ })
+ }
+ else {
+ lines.push(`${spaces}${data}`)
+ }
+
+ return lines.join('\n')
+}
diff --git a/benchmarks/src/questions.ts b/benchmarks/src/questions.ts
new file mode 100644
index 0000000..e211dce
--- /dev/null
+++ b/benchmarks/src/questions.ts
@@ -0,0 +1,398 @@
+/* eslint-disable no-console */
+
+/**
+ * Question generation for TOON benchmarks
+ *
+ * Generates ~200 questions across different types:
+ * - Field retrieval (50%): "What is X's Y?"
+ * - Aggregation (25%): "How many X have Y?"
+ * - Filtering (25%): "List/count X where Y"
+ *
+ * Questions are generated dynamically based on actual data values
+ */
+
+import type { Question } from './types'
+import { datasets } from './datasets'
+
+/**
+ * Generate all questions from datasets
+ */
+export function generateQuestions(): Question[] {
+ const questions: Question[] = []
+ let idCounter = 1
+
+ // Get datasets
+ const tabular = datasets.find(d => d.name === 'tabular')?.data.employees as any[] || []
+ const nested = datasets.find(d => d.name === 'nested')?.data.orders as any[] || []
+ const analytics = datasets.find(d => d.name === 'analytics')?.data.metrics as any[] || []
+ const github = datasets.find(d => d.name === 'github')?.data.repositories as any[] || []
+
+ // ========================================
+ // TABULAR DATASET QUESTIONS (70 questions)
+ // ========================================
+
+ if (tabular.length > 0) {
+ // Field retrieval: specific employees (40 questions)
+ for (let i = 0; i < Math.min(40, tabular.length); i++) {
+ const emp = tabular[i * 2] || tabular[i]
+ if (!emp)
+ continue
+
+ // Alternate between different field types
+ if (i % 3 === 0) {
+ questions.push({
+ id: `q${idCounter++}`,
+ prompt: `What is the salary of ${emp.name}?`,
+ groundTruth: String(emp.salary),
+ type: 'field-retrieval',
+ dataset: 'tabular',
+ })
+ }
+ else if (i % 3 === 1) {
+ questions.push({
+ id: `q${idCounter++}`,
+ prompt: `What department does ${emp.name} work in?`,
+ groundTruth: emp.department,
+ type: 'field-retrieval',
+ dataset: 'tabular',
+ })
+ }
+ else {
+ questions.push({
+ id: `q${idCounter++}`,
+ prompt: `What is the email address of ${emp.name}?`,
+ groundTruth: emp.email,
+ type: 'field-retrieval',
+ dataset: 'tabular',
+ })
+ }
+ }
+
+ // Aggregation: count by department
+ const departments = [...new Set(tabular.map((e: any) => e.department))]
+ for (const dept of departments.slice(0, 6)) {
+ const count = tabular.filter((e: any) => e.department === dept).length
+ questions.push({
+ id: `q${idCounter++}`,
+ prompt: `How many employees work in ${dept}?`,
+ groundTruth: String(count),
+ type: 'aggregation',
+ dataset: 'tabular',
+ })
+ }
+
+ // Aggregation: salary ranges (4 questions)
+ const salaryThresholds = [60000, 80000, 100000, 120000]
+ for (const threshold of salaryThresholds) {
+ const count = tabular.filter((e: any) => e.salary > threshold).length
+ questions.push({
+ id: `q${idCounter++}`,
+ prompt: `How many employees have a salary greater than ${threshold}?`,
+ groundTruth: String(count),
+ type: 'aggregation',
+ dataset: 'tabular',
+ })
+ }
+
+ // Filtering: active status
+ const activeCount = tabular.filter((e: any) => e.active).length
+ const inactiveCount = tabular.filter((e: any) => !e.active).length
+ questions.push(
+ {
+ id: `q${idCounter++}`,
+ prompt: 'How many employees are active?',
+ groundTruth: String(activeCount),
+ type: 'filtering',
+ dataset: 'tabular',
+ },
+ {
+ id: `q${idCounter++}`,
+ prompt: 'How many employees are inactive?',
+ groundTruth: String(inactiveCount),
+ type: 'filtering',
+ dataset: 'tabular',
+ },
+ )
+
+ // Complex filtering: multi-condition (8 questions)
+ for (const dept of departments.slice(0, 4)) {
+ const count = tabular.filter((e: any) => e.department === dept && e.salary > 80000).length
+ questions.push({
+ id: `q${idCounter++}`,
+ prompt: `How many employees in ${dept} have a salary greater than 80000?`,
+ groundTruth: String(count),
+ type: 'filtering',
+ dataset: 'tabular',
+ })
+ }
+
+ for (const exp of [5, 10]) {
+ const count = tabular.filter((e: any) => e.yearsExperience > exp && e.active).length
+ questions.push({
+ id: `q${idCounter++}`,
+ prompt: `How many active employees have more than ${exp} years of experience?`,
+ groundTruth: String(count),
+ type: 'filtering',
+ dataset: 'tabular',
+ })
+ }
+ }
+
+ // ========================================
+ // NESTED DATASET QUESTIONS (50 questions)
+ // ========================================
+
+ if (nested.length > 0) {
+ // Field retrieval: order totals (20 questions)
+ for (let i = 0; i < Math.min(20, nested.length); i++) {
+ const order = nested[i * 2] || nested[i]
+ if (!order)
+ continue
+
+ if (i % 2 === 0) {
+ questions.push({
+ id: `q${idCounter++}`,
+ prompt: `What is the total amount for order ${order.orderId}?`,
+ groundTruth: String(order.total),
+ type: 'field-retrieval',
+ dataset: 'nested',
+ })
+ }
+ else {
+ questions.push({
+ id: `q${idCounter++}`,
+ prompt: `What is the status of order ${order.orderId}?`,
+ groundTruth: order.status,
+ type: 'field-retrieval',
+ dataset: 'nested',
+ })
+ }
+ }
+
+ // Field retrieval: customer info (15 questions)
+ for (let i = 0; i < Math.min(15, nested.length); i++) {
+ const order = nested[i * 3] || nested[i]
+ if (!order)
+ continue
+
+ questions.push({
+ id: `q${idCounter++}`,
+ prompt: `What is the customer name for order ${order.orderId}?`,
+ groundTruth: order.customer.name,
+ type: 'field-retrieval',
+ dataset: 'nested',
+ })
+ }
+
+ // Aggregation: count by status
+ const statuses = [...new Set(nested.map((o: any) => o.status))]
+ for (const status of statuses) {
+ const count = nested.filter((o: any) => o.status === status).length
+ questions.push({
+ id: `q${idCounter++}`,
+ prompt: `How many orders have status "${status}"?`,
+ groundTruth: String(count),
+ type: 'filtering',
+ dataset: 'nested',
+ })
+ }
+
+ // Aggregation: total revenue
+ const totalRevenue = nested.reduce((sum: number, o: any) => sum + o.total, 0)
+ questions.push({
+ id: `q${idCounter++}`,
+ prompt: 'What is the total revenue across all orders?',
+ groundTruth: String(totalRevenue.toFixed(2)),
+ type: 'aggregation',
+ dataset: 'nested',
+ })
+
+ // Filtering: high-value orders (3 questions)
+ const highValueThresholds = [200, 400, 600]
+ for (const threshold of highValueThresholds) {
+ const count = nested.filter((o: any) => o.total > threshold).length
+ questions.push({
+ id: `q${idCounter++}`,
+ prompt: `How many orders have a total greater than ${threshold}?`,
+ groundTruth: String(count),
+ type: 'filtering',
+ dataset: 'nested',
+ })
+ }
+ }
+
+ // ========================================
+ // ANALYTICS DATASET QUESTIONS (40 questions)
+ // ========================================
+
+ if (analytics.length > 0) {
+ // Field retrieval: specific dates (20 questions)
+ for (let i = 0; i < Math.min(20, analytics.length); i++) {
+ const metric = analytics[i * 3] || analytics[i]
+ if (!metric)
+ continue
+
+ if (i % 2 === 0) {
+ questions.push({
+ id: `q${idCounter++}`,
+ prompt: `How many views were recorded on ${metric.date}?`,
+ groundTruth: String(metric.views),
+ type: 'field-retrieval',
+ dataset: 'analytics',
+ })
+ }
+ else {
+ questions.push({
+ id: `q${idCounter++}`,
+ prompt: `What was the revenue on ${metric.date}?`,
+ groundTruth: String(metric.revenue),
+ type: 'field-retrieval',
+ dataset: 'analytics',
+ })
+ }
+ }
+
+ // Aggregation: totals (4 questions)
+ const totalViews = analytics.reduce((sum: number, m: any) => sum + m.views, 0)
+ const totalRevenue = analytics.reduce((sum: number, m: any) => sum + m.revenue, 0)
+ const totalConversions = analytics.reduce((sum: number, m: any) => sum + m.conversions, 0)
+
+ questions.push(
+ {
+ id: `q${idCounter++}`,
+ prompt: 'What is the total number of views across all dates?',
+ groundTruth: String(totalViews),
+ type: 'aggregation',
+ dataset: 'analytics',
+ },
+ {
+ id: `q${idCounter++}`,
+ prompt: 'What is the total revenue across all dates?',
+ groundTruth: String(totalRevenue.toFixed(2)),
+ type: 'aggregation',
+ dataset: 'analytics',
+ },
+ {
+ id: `q${idCounter++}`,
+ prompt: 'What is the total number of conversions across all dates?',
+ groundTruth: String(totalConversions),
+ type: 'aggregation',
+ dataset: 'analytics',
+ },
+ )
+
+ // Filtering: high-performing days (10 questions)
+ const viewThresholds = [5000, 6000, 7000]
+ for (const threshold of viewThresholds) {
+ const count = analytics.filter((m: any) => m.views > threshold).length
+ questions.push({
+ id: `q${idCounter++}`,
+ prompt: `How many days had more than ${threshold} views?`,
+ groundTruth: String(count),
+ type: 'filtering',
+ dataset: 'analytics',
+ })
+ }
+
+ const conversionThresholds = [10, 20, 30]
+ for (const threshold of conversionThresholds) {
+ const count = analytics.filter((m: any) => m.conversions > threshold).length
+ questions.push({
+ id: `q${idCounter++}`,
+ prompt: `How many days had more than ${threshold} conversions?`,
+ groundTruth: String(count),
+ type: 'filtering',
+ dataset: 'analytics',
+ })
+ }
+ }
+
+ // ========================================
+ // GITHUB DATASET QUESTIONS (40 questions)
+ // ========================================
+
+ if (github.length > 0) {
+ // Field retrieval: specific repos (20 questions)
+ for (let i = 0; i < Math.min(20, github.length); i++) {
+ const repo = github[i * 10] || github[i]
+ if (!repo)
+ continue
+
+ if (i % 2 === 0) {
+ questions.push({
+ id: `q${idCounter++}`,
+ prompt: `How many stars does ${repo.owner}/${repo.name} have?`,
+ groundTruth: String(repo.stars),
+ type: 'field-retrieval',
+ dataset: 'github',
+ })
+ }
+ else {
+ questions.push({
+ id: `q${idCounter++}`,
+ prompt: `How many forks does ${repo.owner}/${repo.name} have?`,
+ groundTruth: String(repo.forks),
+ type: 'field-retrieval',
+ dataset: 'github',
+ })
+ }
+ }
+
+ // Aggregation: count by owner (5 questions)
+ const owners = [...new Set(github.map((r: any) => r.owner))]
+ for (const owner of owners.slice(0, 5)) {
+ const count = github.filter((r: any) => r.owner === owner).length
+ questions.push({
+ id: `q${idCounter++}`,
+ prompt: `How many repositories does ${owner} have in the dataset?`,
+ groundTruth: String(count),
+ type: 'aggregation',
+ dataset: 'github',
+ })
+ }
+
+ // Aggregation: total stars
+ const totalStars = github.reduce((sum: number, r: any) => sum + r.stars, 0)
+ questions.push({
+ id: `q${idCounter++}`,
+ prompt: 'What is the total number of stars across all repositories?',
+ groundTruth: String(totalStars),
+ type: 'aggregation',
+ dataset: 'github',
+ })
+
+ // Filtering: popular repos (8 questions)
+ const starThresholds = [10000, 50000, 100000]
+ for (const threshold of starThresholds) {
+ const count = github.filter((r: any) => r.stars > threshold).length
+ questions.push({
+ id: `q${idCounter++}`,
+ prompt: `How many repositories have more than ${threshold} stars?`,
+ groundTruth: String(count),
+ type: 'filtering',
+ dataset: 'github',
+ })
+ }
+
+ const forkThresholds = [1000, 5000, 10000]
+ for (const threshold of forkThresholds) {
+ const count = github.filter((r: any) => r.forks > threshold).length
+ questions.push({
+ id: `q${idCounter++}`,
+ prompt: `How many repositories have more than ${threshold} forks?`,
+ groundTruth: String(count),
+ type: 'filtering',
+ dataset: 'github',
+ })
+ }
+ }
+
+ console.log(`๐ Question breakdown:`)
+ console.log(` Tabular: ${questions.filter(q => q.dataset === 'tabular').length}`)
+ console.log(` Nested: ${questions.filter(q => q.dataset === 'nested').length}`)
+ console.log(` Analytics: ${questions.filter(q => q.dataset === 'analytics').length}`)
+ console.log(` GitHub: ${questions.filter(q => q.dataset === 'github').length}`)
+ console.log(` Total: ${questions.length}`)
+
+ return questions
+}
diff --git a/benchmarks/src/report.ts b/benchmarks/src/report.ts
new file mode 100644
index 0000000..2638622
--- /dev/null
+++ b/benchmarks/src/report.ts
@@ -0,0 +1,288 @@
+/**
+ * Report generation for TOON benchmarks
+ *
+ * Handles:
+ * - Statistical analysis
+ * - Twitter-ready markdown report generation with visual elements
+ * - Per-dataset breakdowns
+ * - Cost analysis
+ * - Result file saving
+ */
+
+import type { EvaluationResult, FormatResult, Question } from './types'
+import * as fsp from 'node:fs/promises'
+import * as path from 'node:path'
+import { encode } from 'gpt-tokenizer'
+import { BENCHMARKS_DIR } from './constants'
+import { datasets } from './datasets'
+import { models } from './evaluate'
+
+/**
+ * Calculate per-format statistics from evaluation results
+ */
+export function calculateFormatResults(
+ results: EvaluationResult[],
+ tokenCounts: Record,
+): FormatResult[] {
+ const formatNames = [...new Set(results.map(r => r.format))]
+
+ return formatNames.map((formatName) => {
+ const formatResults = results.filter(r => r.format === formatName)
+ const correctCount = formatResults.filter(r => r.correct).length
+ const totalCount = formatResults.length
+ const accuracy = correctCount / totalCount
+
+ // Calculate average tokens across all datasets for this format
+ const avgTokens = Object.entries(tokenCounts)
+ .filter(([key]) => key.startsWith(`${formatName}-`))
+ .reduce((sum, [, tokens]) => sum + tokens, 0) / datasets.length
+
+ const avgInputTokens = formatResults.reduce((sum, r) => sum + r.inputTokens, 0) / totalCount
+ const avgLatency = formatResults.reduce((sum, r) => sum + r.latencyMs, 0) / totalCount
+
+ return {
+ format: formatName,
+ accuracy,
+ totalTokens: Math.round(avgTokens),
+ avgInputTokens: Math.round(avgInputTokens),
+ avgLatency: Math.round(avgLatency),
+ correctCount,
+ totalCount,
+ }
+ }).sort((a, b) => b.accuracy - a.accuracy)
+}
+
+/**
+ * Generate embeddable markdown report from results
+ */
+export function generateMarkdownReport(
+ formatResults: FormatResult[],
+ results: EvaluationResult[],
+ questions: Question[],
+ tokenCounts: Record,
+): string {
+ const lines: string[] = [
+ '### Retrieval Accuracy',
+ '',
+ ]
+
+ const toon = formatResults.find(r => r.format === 'toon')
+ const json = formatResults.find(r => r.format === 'json')
+
+ // Model-by-model breakdown (most interesting result)
+ const modelCount = Object.keys(models).length
+ lines.push(`Tested across **${modelCount} ${modelCount === 1 ? 'LLM' : 'LLMs'}** with data retrieval tasks:`, '', '```')
+
+ for (const modelName of Object.keys(models)) {
+ const modelResults = formatResults.map((fr) => {
+ const modelFormatResults = results.filter(r => r.model === modelName && r.format === fr.format)
+ const correctCount = modelFormatResults.filter(r => r.correct).length
+ const totalCount = modelFormatResults.length
+ const accuracy = totalCount > 0 ? correctCount / totalCount : 0
+
+ return {
+ format: fr.format,
+ accuracy,
+ correctCount,
+ totalCount,
+ }
+ }).sort((a, b) => b.accuracy - a.accuracy)
+
+ const bestResult = modelResults[0]!
+ const bar = createTokenBar(bestResult.accuracy, 1, 20)
+
+ lines.push(`${modelName.padEnd(20)} ${bar} ${(bestResult.accuracy * 100).toFixed(1)}% accuracy`)
+ }
+
+ lines.push('```', '')
+
+ // Summary comparison
+ if (toon && json) {
+ const tokenSavings = ((1 - toon.totalTokens / json.totalTokens) * 100).toFixed(1)
+ lines.push(
+ `**TOON achieves ${(toon.accuracy * 100).toFixed(1)}% accuracy (vs JSON's ${(json.accuracy * 100).toFixed(1)}%) while using ${tokenSavings}% fewer tokens.**`,
+ '',
+ )
+ }
+
+ // Simple format comparison table
+ lines.push(
+ '| Format | Accuracy | Average Tokens |',
+ '| ------ | -------- | -------------- |',
+ )
+
+ for (const result of formatResults) {
+ lines.push(
+ `| \`${result.format}\` | ${(result.accuracy * 100).toFixed(1)}% | ${result.totalTokens.toLocaleString()} |`,
+ )
+ }
+
+ lines.push('', '', 'View detailed breakdown by dataset and model
', '', '#### Performance by Dataset', '')
+
+ for (const dataset of datasets) {
+ lines.push(`##### ${dataset.description}`, '')
+
+ const datasetResults = formatResults.map((fr) => {
+ const datasetFormatResults = results.filter(r => r.questionId.includes(dataset.name) || questions.find(q => q.id === r.questionId)?.dataset === dataset.name)
+ if (datasetFormatResults.length === 0)
+ return undefined
+
+ const formatDatasetResults = datasetFormatResults.filter(r => r.format === fr.format)
+ if (formatDatasetResults.length === 0)
+ return undefined
+
+ const correctCount = formatDatasetResults.filter(r => r.correct).length
+ const totalCount = formatDatasetResults.length
+ const accuracy = totalCount > 0 ? correctCount / totalCount : 0
+
+ // Get token count for this dataset+format
+ const tokenKey = `${fr.format}-${dataset.name}`
+ const tokens = tokenCounts[tokenKey] || fr.totalTokens
+
+ return {
+ format: fr.format,
+ accuracy,
+ tokens,
+ correctCount,
+ totalCount,
+ }
+ }).filter(Boolean) as { format: string, accuracy: number, tokens: number, correctCount: number, totalCount: number }[]
+
+ if (datasetResults.length === 0)
+ continue
+
+ // Sort by efficiency
+ datasetResults.sort((a, b) => {
+ const effA = (a.accuracy ** 2) / (a.tokens / 1000)
+ const effB = (b.accuracy ** 2) / (b.tokens / 1000)
+ return effB - effA
+ })
+
+ lines.push(
+ '| Format | Accuracy | Tokens | Correct/Total |',
+ '|--------|----------|--------|---------------|',
+ )
+
+ for (const result of datasetResults.slice(0, 6)) {
+ lines.push(
+ `| \`${result.format}\` | ${(result.accuracy * 100).toFixed(1)}% | ${result.tokens.toLocaleString()} | ${result.correctCount}/${result.totalCount} |`,
+ )
+ }
+
+ lines.push('')
+ }
+
+ // Model breakdown
+ lines.push('', '#### Performance by Model', '')
+
+ for (const modelName of Object.keys(models)) {
+ lines.push(`##### ${modelName}`, '')
+
+ const modelResults = formatResults.map((fr) => {
+ const modelFormatResults = results.filter(r => r.model === modelName && r.format === fr.format)
+ const correctCount = modelFormatResults.filter(r => r.correct).length
+ const totalCount = modelFormatResults.length
+ const accuracy = correctCount / totalCount
+
+ return {
+ format: fr.format,
+ accuracy,
+ correctCount,
+ totalCount,
+ }
+ }).sort((a, b) => b.accuracy - a.accuracy)
+
+ lines.push('| Format | Accuracy | Correct/Total |', '|--------|----------|---------------|')
+
+ for (const result of modelResults) {
+ lines.push(`| \`${result.format}\` | ${(result.accuracy * 100).toFixed(1)}% | ${result.correctCount}/${result.totalCount} |`)
+ }
+
+ lines.push('')
+ }
+
+ // Methodology
+ lines.push(
+ '',
+ '#### Methodology',
+ '',
+ '- **Semantic validation**: LLM-as-judge validates responses semantically (not exact string matching).',
+ '- **Token counting**: Using `gpt-tokenizer` with `o200k_base` encoding.',
+ '- **Question types**: Field retrieval, aggregation, and filtering tasks.',
+ '- **Real data**: Faker.js-generated datasets + GitHub repositories.',
+ '',
+ ' ',
+ '',
+ )
+
+ return lines.join('\n')
+}
+
+/**
+ * Calculate token counts for all format+dataset combinations
+ */
+export function calculateTokenCounts(
+ formatters: Record string>,
+): Record {
+ const tokenCounts: Record = {}
+
+ for (const [formatName, formatter] of Object.entries(formatters)) {
+ for (const dataset of datasets) {
+ const formatted = formatter(dataset.data)
+ const key = `${formatName}-${dataset.name}`
+ tokenCounts[key] = encode(formatted).length
+ }
+ }
+
+ return tokenCounts
+}
+
+/**
+ * Save results to disk
+ */
+export async function saveResults(
+ results: EvaluationResult[],
+ formatResults: FormatResult[],
+ questions: Question[],
+ tokenCounts: Record,
+): Promise {
+ const resultsDir = path.join(BENCHMARKS_DIR, 'results', 'accuracy')
+ await fsp.mkdir(resultsDir, { recursive: true })
+
+ // Save raw results
+ await fsp.writeFile(
+ path.join(resultsDir, 'raw-results.json'),
+ JSON.stringify(results, undefined, 2),
+ )
+
+ // Save summary
+ await fsp.writeFile(
+ path.join(resultsDir, 'summary.json'),
+ JSON.stringify({
+ formatResults,
+ questions: questions.length,
+ models: Object.keys(models),
+ datasets: datasets.map(d => ({ name: d.name, description: d.description })),
+ tokenCounts,
+ timestamp: new Date().toISOString(),
+ }, undefined, 2),
+ )
+
+ // Generate markdown report
+ const report = generateMarkdownReport(formatResults, results, questions, tokenCounts)
+ await fsp.writeFile(
+ path.join(resultsDir, 'report.md'),
+ report,
+ )
+
+ return resultsDir
+}
+
+/**
+ * Generate visual bar chart for token counts
+ */
+function createTokenBar(tokens: number, maxTokens: number, width = 30): string {
+ const filled = Math.round((tokens / maxTokens) * width)
+ const empty = width - filled
+ return 'โ'.repeat(filled) + 'โ'.repeat(empty)
+}
diff --git a/benchmarks/src/types.ts b/benchmarks/src/types.ts
new file mode 100644
index 0000000..bca48fa
--- /dev/null
+++ b/benchmarks/src/types.ts
@@ -0,0 +1,35 @@
+export interface Dataset {
+ name: string
+ description: string
+ data: any
+}
+
+export interface Question {
+ id: string
+ prompt: string
+ groundTruth: string
+ type: 'field-retrieval' | 'aggregation' | 'filtering' | 'comparison'
+ dataset: string
+}
+
+export interface EvaluationResult {
+ questionId: string
+ format: string
+ model: string
+ expected: string
+ actual: string
+ correct: boolean
+ inputTokens: number
+ outputTokens: number
+ latencyMs: number
+}
+
+export interface FormatResult {
+ format: string
+ accuracy: number
+ totalTokens: number
+ avgInputTokens: number
+ avgLatency: number
+ correctCount: number
+ totalCount: number
+}
diff --git a/docs/benchmarks.md b/docs/benchmarks.md
deleted file mode 100644
index 146fbed..0000000
--- a/docs/benchmarks.md
+++ /dev/null
@@ -1,158 +0,0 @@
-| Example | JSON | TOON | Tokens Saved | Reduction |
-| ------- | ---- | ---- | ------------ | --------- |
-| ๐ค Simple user object | 31 | 18 | 13 | **41.9%** |
-| ๐ท๏ธ User with tags | 48 | 28 | 20 | **41.7%** |
-| ๐ฆ Small product catalog | 117 | 49 | 68 | **58.1%** |
-| ๐ฅ API response with users | 123 | 53 | 70 | **56.9%** |
-| โ๏ธ Nested configuration | 68 | 42 | 26 | **38.2%** |
-| ๐ E-commerce order | 163 | 94 | 69 | **42.3%** |
-| ๐ Analytics data | 209 | 94 | 115 | **55.0%** |
-| ๐ Large dataset (50 records) | 2159 | 762 | 1397 | **64.7%** |
-| **Total** | **2918** | **1140** | **1778** | **60.9%** |
-
-
-View detailed results
-
-### ๐ฆ Small product catalog
-
-**Savings: 68 tokens (58.1% reduction)**
-
-**JSON** (117 tokens):
-
-```json
-{
- "items": [
- {
- "sku": "A1",
- "name": "Widget",
- "qty": 2,
- "price": 9.99
- },
- {
- "sku": "B2",
- "name": "Gadget",
- "qty": 1,
- "price": 14.5
- },
- {
- "sku": "C3",
- "name": "Doohickey",
- "qty": 5,
- "price": 7.25
- }
- ]
-}
-```
-
-**TOON** (49 tokens):
-
-```
-items[3]{sku,name,qty,price}:
- A1,Widget,2,9.99
- B2,Gadget,1,14.5
- C3,Doohickey,5,7.25
-```
-
----
-
-### ๐ฅ API response with users
-
-**Savings: 70 tokens (56.9% reduction)**
-
-**JSON** (123 tokens):
-
-```json
-{
- "users": [
- {
- "id": 1,
- "name": "Alice",
- "email": "alice@example.com",
- "active": true
- },
- {
- "id": 2,
- "name": "Bob",
- "email": "bob@example.com",
- "active": true
- },
- {
- "id": 3,
- "name": "Charlie",
- "email": "charlie@example.com",
- "active": false
- }
- ],
- "total": 3,
- "page": 1
-}
-```
-
-**TOON** (53 tokens):
-
-```
-users[3]{id,name,email,active}:
- 1,Alice,alice@example.com,true
- 2,Bob,bob@example.com,true
- 3,Charlie,charlie@example.com,false
-total: 3
-page: 1
-```
-
----
-
-### ๐ Analytics data
-
-**Savings: 115 tokens (55.0% reduction)**
-
-**JSON** (209 tokens):
-
-```json
-{
- "metrics": [
- {
- "date": "2025-01-01",
- "views": 1234,
- "clicks": 89,
- "conversions": 12
- },
- {
- "date": "2025-01-02",
- "views": 2345,
- "clicks": 156,
- "conversions": 23
- },
- {
- "date": "2025-01-03",
- "views": 1890,
- "clicks": 123,
- "conversions": 18
- },
- {
- "date": "2025-01-04",
- "views": 3456,
- "clicks": 234,
- "conversions": 34
- },
- {
- "date": "2025-01-05",
- "views": 2789,
- "clicks": 178,
- "conversions": 27
- }
- ]
-}
-```
-
-**TOON** (94 tokens):
-
-```
-metrics[5]{date,views,clicks,conversions}:
- 2025-01-01,1234,89,12
- 2025-01-02,2345,156,23
- 2025-01-03,1890,123,18
- 2025-01-04,3456,234,34
- 2025-01-05,2789,178,27
-```
-
-
diff --git a/package.json b/package.json
index 61525de..8f13df7 100644
--- a/package.json
+++ b/package.json
@@ -26,7 +26,7 @@
"dist"
],
"scripts": {
- "automd": "tsx scripts/generate-bench.ts && automd",
+ "automd": "automd",
"build": "tsdown",
"lint": "eslint .",
"lint:fix": "eslint . --fix",
@@ -35,16 +35,16 @@
"release": "bumpp"
},
"devDependencies": {
- "@antfu/eslint-config": "^6.0.0",
+ "@antfu/eslint-config": "^6.1.0",
"@types/node": "^24.9.1",
"automd": "^0.4.2",
"bumpp": "^10.3.1",
"eslint": "^9.38.0",
"gpt-tokenizer": "^3.2.0",
- "tsdown": "^0.15.9",
+ "tsdown": "^0.15.10",
"tsx": "^4.20.6",
"typescript": "^5.9.3",
- "vitest": "^3.2.4"
+ "vitest": "^4.0.3"
},
"pnpm": {
"onlyBuiltDependencies": [
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 20244df..3894eb4 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -9,8 +9,8 @@ importers:
.:
devDependencies:
'@antfu/eslint-config':
- specifier: ^6.0.0
- version: 6.0.0(@vue/compiler-sfc@3.5.22)(eslint@9.38.0(jiti@2.6.1))(typescript@5.9.3)(vitest@3.2.4(@types/debug@4.1.12)(@types/node@24.9.1)(jiti@2.6.1)(tsx@4.20.6)(yaml@2.8.1))
+ specifier: ^6.1.0
+ version: 6.1.0(@vue/compiler-sfc@3.5.22)(eslint@9.38.0(jiti@2.6.1))(typescript@5.9.3)(vitest@4.0.3(@types/debug@4.1.12)(@types/node@24.9.1)(jiti@2.6.1)(tsx@4.20.6)(yaml@2.8.1))
'@types/node':
specifier: ^24.9.1
version: 24.9.1
@@ -27,8 +27,8 @@ importers:
specifier: ^3.2.0
version: 3.2.0
tsdown:
- specifier: ^0.15.9
- version: 0.15.9(typescript@5.9.3)
+ specifier: ^0.15.10
+ version: 0.15.10(typescript@5.9.3)
tsx:
specifier: ^4.20.6
version: 4.20.6
@@ -36,17 +36,93 @@ importers:
specifier: ^5.9.3
version: 5.9.3
vitest:
- specifier: ^3.2.4
- version: 3.2.4(@types/debug@4.1.12)(@types/node@24.9.1)(jiti@2.6.1)(tsx@4.20.6)(yaml@2.8.1)
+ specifier: ^4.0.3
+ version: 4.0.3(@types/debug@4.1.12)(@types/node@24.9.1)(jiti@2.6.1)(tsx@4.20.6)(yaml@2.8.1)
+
+ benchmarks:
+ devDependencies:
+ '@ai-sdk/anthropic':
+ specifier: ^2.0.37
+ version: 2.0.37(zod@4.1.12)
+ '@ai-sdk/google':
+ specifier: ^2.0.23
+ version: 2.0.23(zod@4.1.12)
+ '@ai-sdk/openai':
+ specifier: ^2.0.53
+ version: 2.0.53(zod@4.1.12)
+ '@ai-sdk/provider':
+ specifier: ^2.0.0
+ version: 2.0.0
+ '@antfu/eslint-config':
+ specifier: ^6.1.0
+ version: 6.1.0(@vue/compiler-sfc@3.5.22)(eslint@9.38.0(jiti@2.6.1))(typescript@5.9.3)(vitest@4.0.3(@types/debug@4.1.12)(@types/node@24.9.1)(jiti@2.6.1)(tsx@4.20.6)(yaml@2.8.1))
+ '@faker-js/faker':
+ specifier: ^10.1.0
+ version: 10.1.0
+ ai:
+ specifier: ^5.0.80
+ version: 5.0.80(zod@4.1.12)
+ consola:
+ specifier: ^3.4.2
+ version: 3.4.2
+ csv-stringify:
+ specifier: ^6.6.0
+ version: 6.6.0
+ gpt-tokenizer:
+ specifier: ^3.2.0
+ version: 3.2.0
+ ofetch:
+ specifier: ^1.4.1
+ version: 1.4.1
+ p-map:
+ specifier: ^7.0.3
+ version: 7.0.3
+ yaml:
+ specifier: ^2.8.1
+ version: 2.8.1
packages:
- '@antfu/eslint-config@6.0.0':
- resolution: {integrity: sha512-M2RM+x+hpxpASEZzQh4d5uaUEHn8sYNVlTB+CySpLkDs2rr3QFvRR7KqNdnox/OIPc6YWMsIEnM/XUbQP52nTA==}
+ '@ai-sdk/anthropic@2.0.37':
+ resolution: {integrity: sha512-r2e9BWoobisH9B5b7x3yYG/k9WlsZqa4D94o7gkwktReqrjjv83zNMop4KmlJsh/zBhbsaP8S8SUfiwK+ESxgg==}
+ engines: {node: '>=18'}
+ peerDependencies:
+ zod: ^3.25.76 || ^4.1.8
+
+ '@ai-sdk/gateway@2.0.1':
+ resolution: {integrity: sha512-vPVIbnP35ZnayS937XLo85vynR85fpBQWHCdUweq7apzqFOTU2YkUd4V3msebEHbQ2Zro60ZShDDy9SMiyWTqA==}
+ engines: {node: '>=18'}
+ peerDependencies:
+ zod: ^3.25.76 || ^4.1.8
+
+ '@ai-sdk/google@2.0.23':
+ resolution: {integrity: sha512-VbCnKR+6aWUVLkAiSW5gUEtST7KueEmlt+d6qwDikxlLnFG9pzy59je8MiDVeM5G2tuSXbvZQF78PGIfXDBmow==}
+ engines: {node: '>=18'}
+ peerDependencies:
+ zod: ^3.25.76 || ^4.1.8
+
+ '@ai-sdk/openai@2.0.53':
+ resolution: {integrity: sha512-GIkR3+Fyif516ftXv+YPSPstnAHhcZxNoR2s8uSHhQ1yBT7I7aQYTVwpjAuYoT3GR+TeP50q7onj2/nDRbT2FQ==}
+ engines: {node: '>=18'}
+ peerDependencies:
+ zod: ^3.25.76 || ^4.1.8
+
+ '@ai-sdk/provider-utils@3.0.12':
+ resolution: {integrity: sha512-ZtbdvYxdMoria+2SlNarEk6Hlgyf+zzcznlD55EAl+7VZvJaSg2sqPvwArY7L6TfDEDJsnCq0fdhBSkYo0Xqdg==}
+ engines: {node: '>=18'}
+ peerDependencies:
+ zod: ^3.25.76 || ^4.1.8
+
+ '@ai-sdk/provider@2.0.0':
+ resolution: {integrity: sha512-6o7Y2SeO9vFKB8lArHXehNuusnpddKPk7xqL7T2/b+OvXMRIXUO1rR4wcv1hAFUAT9avGZshty3Wlua/XA7TvA==}
+ engines: {node: '>=18'}
+
+ '@antfu/eslint-config@6.1.0':
+ resolution: {integrity: sha512-m/L9TGvtG3r4tkfq5BY6THz7pk0g6yuJwwA0SkLEDHJJpt0upuABhs8v3SU8yaPtCGUxq8k2QTLMZ3WPg4vSdw==}
hasBin: true
peerDependencies:
'@eslint-react/eslint-plugin': ^2.0.1
- '@next/eslint-plugin-next': ^15.4.0-canary.115
+ '@next/eslint-plugin-next': '>=15.0.0'
'@prettier/plugin-xml': ^3.4.1
'@unocss/eslint-plugin': '>=0.50.0'
astro-eslint-parser: ^1.0.2
@@ -99,20 +175,20 @@ packages:
'@antfu/install-pkg@1.1.0':
resolution: {integrity: sha512-MGQsmw10ZyI+EJo45CdSER4zEb+p31LpDAFp2Z3gkSd1yqVZGi0Ebx++YTEMonJy4oChEMLsxZ64j8FH6sSqtQ==}
- '@babel/generator@7.28.3':
- resolution: {integrity: sha512-3lSpxGgvnmZznmBkCRnVREPUFJv2wrv9iAoFDvADJc0ypmdOxdUtcLeBgBJ6zE0PMeTKnxeQzyk0xTBq4Ep7zw==}
+ '@babel/generator@7.28.5':
+ resolution: {integrity: sha512-3EwLFhZ38J4VyIP6WNtt2kUdW9dokXA9Cr4IVIFHuCpZ3H8/YFOl5JjZHisrn1fATPBmKKqXzDFvh9fUwHz6CQ==}
engines: {node: '>=6.9.0'}
'@babel/helper-string-parser@7.27.1':
resolution: {integrity: sha512-qMlSxKbpRlAridDExk92nSobyDdpPijUq2DW6oDnUqd0iOGxmQjyqhMIihI9+zv4LPyZdRje2cavWPbCbWm3eA==}
engines: {node: '>=6.9.0'}
- '@babel/helper-validator-identifier@7.27.1':
- resolution: {integrity: sha512-D2hP9eA+Sqx1kBZgzxZh0y1trbuU+JoDkiEwqhQ36nodYqJwyEIhPSdMNd7lOm/4io72luTPWH20Yda0xOuUow==}
+ '@babel/helper-validator-identifier@7.28.5':
+ resolution: {integrity: sha512-qSs4ifwzKJSV39ucNjsvc6WVHs6b7S03sOh2OcHF9UHfVPqWWALUsNUVzhSBiItjRZoLHx7nIarVjqKVusUZ1Q==}
engines: {node: '>=6.9.0'}
- '@babel/parser@7.28.4':
- resolution: {integrity: sha512-yZbBqeM6TkpP9du/I2pUZnJsRMGGvOuIrhjzC1AwHwW+6he4mni6Bp/m8ijn0iOuZuPI2BfkCoSRunpyjnrQKg==}
+ '@babel/parser@7.28.5':
+ resolution: {integrity: sha512-KKBU1VGYR7ORr3At5HAtUQ+TV3SzRCXmA/8OdDZiLDBIZxVyzXuztPjfLd3BV1PRAQGCMWWSHYhL0F8d5uHBDQ==}
engines: {node: '>=6.0.0'}
hasBin: true
@@ -120,8 +196,8 @@ packages:
resolution: {integrity: sha512-Q/N6JNWvIvPnLDvjlE1OUBLPQHH6l3CltCEsHIujp45zQUSSh8K+gHnaEX45yAT1nyngnINhvWtzN+Nb9D8RAQ==}
engines: {node: '>=6.9.0'}
- '@babel/types@7.28.4':
- resolution: {integrity: sha512-bkFqkLhh3pMBUQQkpVgWDWq/lqzc2678eUyDlTBhRqhCHFguYYGM0Efga7tYk4TogG/3x0EEl66/OQ+WGbWB/Q==}
+ '@babel/types@7.28.5':
+ resolution: {integrity: sha512-qQ5m48eI/MFLQ5PxQj4PFaprjyCTLI37ElWMmNs0K8Lk3dVeOdNpB3ks8jc7yM5CDmVC73eMVk/trk3fgmrUpA==}
engines: {node: '>=6.9.0'}
'@clack/core@0.5.0':
@@ -143,10 +219,14 @@ packages:
resolution: {integrity: sha512-YAdE/IJSpwbOTiaURNCKECdAwqrJuFiZhylmesBcIRawtYKnBR2wxPhoIewMg+Yu+QuYvHfJNReWpoxGBKOChA==}
engines: {node: '>=18'}
- '@es-joy/jsdoccomment@0.58.0':
- resolution: {integrity: sha512-smMc5pDht/UVsCD3hhw/a/e/p8m0RdRYiluXToVfd+d4yaQQh7nn9bACjkk6nXJvat7EWPAxuFkMEFfrxeGa3Q==}
+ '@es-joy/jsdoccomment@0.76.0':
+ resolution: {integrity: sha512-g+RihtzFgGTx2WYCuTHbdOXJeAlGnROws0TeALx9ow/ZmOROOZkVg5wp/B44n0WJgI4SQFP1eWM2iRPlU2Y14w==}
engines: {node: '>=20.11.0'}
+ '@es-joy/resolve.exports@1.0.0':
+ resolution: {integrity: sha512-bbrmzsAZ9GA/3oBS6r8PWMtZarEhKHr413hak8ArwMEZ5DtaLErnkcyEWUsXy7urBcmVu/TpDzHPDVM5uIbx9A==}
+ engines: {node: '>=10'}
+
'@esbuild/aix-ppc64@0.25.11':
resolution: {integrity: sha512-Xt1dOL13m8u0WE8iplx9Ibbm+hFAO0GsU2P34UNoDGvZYkY8ifSiy6Zuc1lYxfG7svWE2fzqCUmFp5HCn51gJg==}
engines: {node: '>=18'}
@@ -368,6 +448,10 @@ packages:
resolution: {integrity: sha512-sB5uyeq+dwCWyPi31B2gQlVlo+j5brPlWx4yZBrEaRo/nhdDE8Xke1gsGgtiBdaBTxuTkceLVuVt/pclrasb0A==}
engines: {node: ^18.18.0 || ^20.9.0 || >=21.1.0}
+ '@faker-js/faker@10.1.0':
+ resolution: {integrity: sha512-C3mrr3b5dRVlKPJdfrAXS8+dq+rq8Qm5SNRazca0JKgw1HQERFmrVb0towvMmw5uu8hHKNiQasMaR/tydf3Zsg==}
+ engines: {node: ^20.19.0 || ^22.13.0 || ^23.5.0 || >=24.0.0, npm: '>=10'}
+
'@humanfs/core@0.19.1':
resolution: {integrity: sha512-5DyQ4+1JEUzejeK1JGICcideyfUbGixgS9jNgex5nqkW+cY7WZhxBigmieN5Qnw9ZosSNVC9KQKyb+GUaGyKUA==}
engines: {node: '>=18.18.0'}
@@ -412,6 +496,14 @@ packages:
resolution: {integrity: sha512-oGB+UxlgWcgQkgwo8GcEGwemoTFt3FIO9ababBmaGwXIoBKZ+GTy0pP185beGg7Llih/NSHSV2XAs1lnznocSg==}
engines: {node: '>= 8'}
+ '@opentelemetry/api@1.9.0':
+ resolution: {integrity: sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg==}
+ engines: {node: '>=8.0.0'}
+
+ '@oxc-project/runtime@0.95.0':
+ resolution: {integrity: sha512-qJS5pNepwMGnafO9ayKGz7rfPQgUBuunHpnP1//9Qa0zK3oT3t1EhT+I+pV9MUA+ZKez//OFqxCxf1vijCKb2Q==}
+ engines: {node: ^20.19.0 || >=22.12.0}
+
'@oxc-project/types@0.95.0':
resolution: {integrity: sha512-vACy7vhpMPhjEJhULNxrdR0D943TkA/MigMpJCHmBHvMXxRStRi/dPtTlfQ3uDwWSzRpT8z+7ImjZVf8JWBocQ==}
@@ -700,6 +792,13 @@ packages:
cpu: [x64]
os: [win32]
+ '@sindresorhus/base62@1.0.0':
+ resolution: {integrity: sha512-TeheYy0ILzBEI/CO55CP6zJCSdSWeRtGnHy8U8dWSUH4I68iqTsy7HkMktR4xakThc9jotkPQUXT4ITdbV7cHA==}
+ engines: {node: '>=18'}
+
+ '@standard-schema/spec@1.0.0':
+ resolution: {integrity: sha512-m2bOd0f2RT9k8QJx1JN85cZYyH1RqFBdlwtkSlf4tBDYLCiiZnv1fIIwacK6cqwXavOydf0NPToMQgpKq+dVlA==}
+
'@stylistic/eslint-plugin@5.5.0':
resolution: {integrity: sha512-IeZF+8H0ns6prg4VrkhgL+yrvDXWDH2cKchrbh80ejG9dQgZWp10epHMbgRuQvgchLII/lfh6Xn3lu6+6L86Hw==}
engines: {node: ^18.18.0 || ^20.9.0 || >=21.1.0}
@@ -795,12 +894,16 @@ packages:
resolution: {integrity: sha512-tUFMXI4gxzzMXt4xpGJEsBsTox0XbNQ1y94EwlD/CuZwFcQP79xfQqMhau9HsRc/J0cAPA/HZt1dZPtGn9V/7w==}
engines: {node: ^18.18.0 || ^20.9.0 || >=21.1.0}
- '@vitest/eslint-plugin@1.3.23':
- resolution: {integrity: sha512-kp1vjoJTdVf8jWdzr/JpHIPfh3HMR6JBr2p7XuH4YNx0UXmV4XWdgzvCpAmH8yb39Gry31LULiuBcuhyc/OqkQ==}
+ '@vercel/oidc@3.0.3':
+ resolution: {integrity: sha512-yNEQvPcVrK9sIe637+I0jD6leluPxzwJKx/Haw6F4H77CdDsszUn5V3o96LPziXkSNE2B83+Z3mjqGKBK/R6Gg==}
+ engines: {node: '>= 20'}
+
+ '@vitest/eslint-plugin@1.3.25':
+ resolution: {integrity: sha512-7qM/FrA2VyUmrorP0TQ/Oqhn6wsAcktg6euBn0XmpgF0yT2mDxjziu2QLy86i2mOJ41Wtt55z6aUWo+bfmyAeg==}
engines: {node: '>=18'}
peerDependencies:
- eslint: '>= 8.57.0'
- typescript: '>= 5.0.0'
+ eslint: '>=8.57.0'
+ typescript: '>=5.0.0'
vitest: '*'
peerDependenciesMeta:
typescript:
@@ -808,34 +911,34 @@ packages:
vitest:
optional: true
- '@vitest/expect@3.2.4':
- resolution: {integrity: sha512-Io0yyORnB6sikFlt8QW5K7slY4OjqNX9jmJQ02QDda8lyM6B5oNgVWoSoKPac8/kgnCUzuHQKrSLtu/uOqqrig==}
+ '@vitest/expect@4.0.3':
+ resolution: {integrity: sha512-v3eSDx/bF25pzar6aEJrrdTXJduEBU3uSGXHslIdGIpJVP8tQQHV6x1ZfzbFQ/bLIomLSbR/2ZCfnaEGkWkiVQ==}
- '@vitest/mocker@3.2.4':
- resolution: {integrity: sha512-46ryTE9RZO/rfDd7pEqFl7etuyzekzEhUbTW3BvmeO/BcCMEgq59BKhek3dXDWgAj4oMK6OZi+vRr1wPW6qjEQ==}
+ '@vitest/mocker@4.0.3':
+ resolution: {integrity: sha512-evZcRspIPbbiJEe748zI2BRu94ThCBE+RkjCpVF8yoVYuTV7hMe+4wLF/7K86r8GwJHSmAPnPbZhpXWWrg1qbA==}
peerDependencies:
msw: ^2.4.9
- vite: ^5.0.0 || ^6.0.0 || ^7.0.0-0
+ vite: ^6.0.0 || ^7.0.0-0
peerDependenciesMeta:
msw:
optional: true
vite:
optional: true
- '@vitest/pretty-format@3.2.4':
- resolution: {integrity: sha512-IVNZik8IVRJRTr9fxlitMKeJeXFFFN0JaB9PHPGQ8NKQbGpfjlTx9zO4RefN8gp7eqjNy8nyK3NZmBzOPeIxtA==}
+ '@vitest/pretty-format@4.0.3':
+ resolution: {integrity: sha512-N7gly/DRXzxa9w9sbDXwD9QNFYP2hw90LLLGDobPNwiWgyW95GMxsCt29/COIKKh3P7XJICR38PSDePenMBtsw==}
- '@vitest/runner@3.2.4':
- resolution: {integrity: sha512-oukfKT9Mk41LreEW09vt45f8wx7DordoWUZMYdY/cyAk7w5TWkTRCNZYF7sX7n2wB7jyGAl74OxgwhPgKaqDMQ==}
+ '@vitest/runner@4.0.3':
+ resolution: {integrity: sha512-1/aK6fPM0lYXWyGKwop2Gbvz1plyTps/HDbIIJXYtJtspHjpXIeB3If07eWpVH4HW7Rmd3Rl+IS/+zEAXrRtXA==}
- '@vitest/snapshot@3.2.4':
- resolution: {integrity: sha512-dEYtS7qQP2CjU27QBC5oUOxLE/v5eLkGqPE0ZKEIDGMs4vKWe7IjgLOeauHsR0D5YuuycGRO5oSRXnwnmA78fQ==}
+ '@vitest/snapshot@4.0.3':
+ resolution: {integrity: sha512-amnYmvZ5MTjNCP1HZmdeczAPLRD6iOm9+2nMRUGxbe/6sQ0Ymur0NnR9LIrWS8JA3wKE71X25D6ya/3LN9YytA==}
- '@vitest/spy@3.2.4':
- resolution: {integrity: sha512-vAfasCOe6AIK70iP5UD11Ac4siNUNJ9i/9PZ3NKx07sG6sUxeag1LWdNrMWeKKYBLlzuK+Gn65Yd5nyL6ds+nw==}
+ '@vitest/spy@4.0.3':
+ resolution: {integrity: sha512-82vVL8Cqz7rbXaNUl35V2G7xeNMAjBdNOVaHbrzznT9BmiCiPOzhf0FhU3eP41nP1bLDm/5wWKZqkG4nyU95DQ==}
- '@vitest/utils@3.2.4':
- resolution: {integrity: sha512-fB2V0JFrQSMsCo9HiSq3Ezpdv4iYaXRG1Sx8edX3MwxfyNn83mKiGzOcH+Fkxt4MHxr3y42fQi1oeAInqgX2QA==}
+ '@vitest/utils@4.0.3':
+ resolution: {integrity: sha512-qV6KJkq8W3piW6MDIbGOmn1xhvcW4DuA07alqaQ+vdx7YA49J85pnwnxigZVQFQw3tWnQNRKWwhz5wbP6iv/GQ==}
'@vue/compiler-core@3.5.22':
resolution: {integrity: sha512-jQ0pFPmZwTEiRNSb+i9Ow/I/cHv2tXYqsnHKKyCQ08irI2kdF5qmYedmF8si8mA7zepUFmJ2hqzS8CQmNOWOkQ==}
@@ -862,6 +965,12 @@ packages:
engines: {node: '>=0.4.0'}
hasBin: true
+ ai@5.0.80:
+ resolution: {integrity: sha512-g1o6pjxm1eTtyh295dRhsg0gvZaHFlSo2oruWrK2rIR7KafWEhNB2A2/aJ9hyPT9AMI8JnQJyto1Tl9DMqwc9w==}
+ engines: {node: '>=18'}
+ peerDependencies:
+ zod: ^3.25.76 || ^4.1.8
+
ajv@6.12.6:
resolution: {integrity: sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==}
@@ -898,8 +1007,8 @@ packages:
balanced-match@1.0.2:
resolution: {integrity: sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==}
- baseline-browser-mapping@2.8.19:
- resolution: {integrity: sha512-zoKGUdu6vb2jd3YOq0nnhEDQVbPcHhco3UImJrv5dSkvxTc2pl2WjOPsjZXDwPDSl5eghIMuY3R6J9NDKF3KcQ==}
+ baseline-browser-mapping@2.8.20:
+ resolution: {integrity: sha512-JMWsdF+O8Orq3EMukbUN1QfbLK9mX2CkUmQBcW2T0s8OmdAUL5LLM/6wFwSrqXzlXB13yhyK9gTKS1rIizOduQ==}
hasBin: true
birpc@2.6.1:
@@ -954,8 +1063,8 @@ packages:
ccount@2.0.1:
resolution: {integrity: sha512-eyrF0jiFpY+3drT6383f1qhkbGsLSifNAjA61IUjZjmLCWjItY6LB9ft9YhoDgwfmclB2zhu51Lc7+95b8NRAg==}
- chai@5.3.3:
- resolution: {integrity: sha512-4zNhdJD/iOjSH0A05ea+Ke6MU5mmpQcbQsSOkgdaUMJ9zTlDTD/GYlwohmIE2u0gaxHYiVHEn1Fw9mZ/ktJWgw==}
+ chai@6.2.0:
+ resolution: {integrity: sha512-aUTnJc/JipRzJrNADXVvpVqi6CO0dn3nx4EVPxijri+fj3LUUDyZQOgVeW54Ob3Y1Xh9Iz8f+CgaCl8v0mn9bA==}
engines: {node: '>=18'}
chalk@4.1.2:
@@ -968,10 +1077,6 @@ packages:
character-entities@2.0.2:
resolution: {integrity: sha512-shx7oQ0Awen/BRIdkjkvz54PnEEI/EjwXDSIZp86/KKdbafHh1Df/RYGBhn4hbe2+uKC9FnT5UCEdyPz3ai9hQ==}
- check-error@2.1.1:
- resolution: {integrity: sha512-OAlb+T7V4Op9OwdkjmguYRqncdlx5JiofwOAUkmTF+jNdHwzTaTs4sRAGpzLF3oOz5xAyDGrPgeIDFQmDOTiJw==}
- engines: {node: '>= 16'}
-
chokidar@4.0.3:
resolution: {integrity: sha512-Qgzu8kfBvo+cA4962jnP1KkS6Dop5NS6g7R5LFYJr4b8Ub94PPQXUksCw9PvXoeXPRRddRNC5C1JQUR2SMGtnA==}
engines: {node: '>= 14.16.0'}
@@ -1023,6 +1128,9 @@ packages:
engines: {node: '>=4'}
hasBin: true
+ csv-stringify@6.6.0:
+ resolution: {integrity: sha512-YW32lKOmIBgbxtu3g5SaiqWNwa/9ISQt2EcgOq0+RAIFufFp9is6tqNnKahqE5kuKvrnYAzs28r+s6pXJR8Vcw==}
+
debug@4.4.3:
resolution: {integrity: sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==}
engines: {node: '>=6.0'}
@@ -1035,10 +1143,6 @@ packages:
decode-named-character-reference@1.2.0:
resolution: {integrity: sha512-c6fcElNV6ShtZXmsgNgFFV5tVX2PaV4g+MOAkb8eXHvn6sryJBrZa9r0zV6+dtTyoCKxtDy5tyQ5ZwQuidtd+Q==}
- deep-eql@5.0.2:
- resolution: {integrity: sha512-h5k/5U50IJJFpzfL6nO9jaaumfjO/f2NjK/oYB2Djzm4p9L+3T9qWpZqZ2hAbLPuuYq9wrU08WQyBTL5GbPk5Q==}
- engines: {node: '>=6'}
-
deep-is@0.1.4:
resolution: {integrity: sha512-oIPzksmTg4/MriiaYGO+okXDT7ztn/w3Eptv/+gSIdMdKsJo0u4CfYNFJPy+4SKMuCqGw2wxnA+URMg3t8a/bQ==}
@@ -1085,8 +1189,8 @@ packages:
oxc-resolver:
optional: true
- electron-to-chromium@1.5.238:
- resolution: {integrity: sha512-khBdc+w/Gv+cS8e/Pbnaw/FXcBUeKrRVik9IxfXtgREOWyJhR4tj43n3amkVogJ/yeQUqzkrZcFhtIxIdqmmcQ==}
+ electron-to-chromium@1.5.240:
+ resolution: {integrity: sha512-OBwbZjWgrCOH+g6uJsA2/7Twpas2OlepS9uvByJjR2datRDuKGYeD+nP8lBBks2qnB7bGJNHDUx7c/YLaT3QMQ==}
empathic@2.0.0:
resolution: {integrity: sha512-i6UzDscO/XfAcNYD75CfICkmfLedpyPDdozrLMmQc5ORaQcdMoc21OnlEylMIqI7U8eniKrPMxxtj8k0vhmJhA==}
@@ -1186,8 +1290,8 @@ packages:
typescript:
optional: true
- eslint-plugin-jsdoc@59.1.0:
- resolution: {integrity: sha512-sg9mzjjzfnMynyY4W8FDiQv3i8eFcKVEHDt4Xh7MLskP3QkMt2z6p7FuzSw7jJSKFues6RaK2GWvmkB1FLPxXg==}
+ eslint-plugin-jsdoc@61.1.9:
+ resolution: {integrity: sha512-X2AzSGbq1CzBRgKcVAu2qzOV9ogqygkUDk5AX6eNK5G+kY3I5Op5E5b99fE+FN0/bGnk2KGcsMIG6ZLF+di69A==}
engines: {node: '>=20.11.0'}
peerDependencies:
eslint: ^7.0.0 || ^8.0.0 || ^9.0.0
@@ -1324,6 +1428,10 @@ packages:
resolution: {integrity: sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==}
engines: {node: '>=0.10.0'}
+ eventsource-parser@3.0.6:
+ resolution: {integrity: sha512-Vo1ab+QXPzZ4tCa8SwIHJFaSzy4R6SHf7BY79rFBDf0idraZWAkYrDjDj8uWaSm3S2TK+hJ7/t1CEmZ7jXw+pg==}
+ engines: {node: '>=18.0.0'}
+
expect-type@1.2.2:
resolution: {integrity: sha512-JhFGDVJ7tmDJItKhYgJCGLOWjuK9vPxiXoUFLwLDc99NlmklilbiQJwoctZtt13+xMw91MCk/REan6MWHqDjyA==}
engines: {node: '>=12.0.0'}
@@ -1444,6 +1552,9 @@ packages:
hookable@5.5.3:
resolution: {integrity: sha512-Yc+BQe8SvoXH1643Qez1zqLRmbA5rCL+sSmk6TVos0LWVfNIB7PGncdlId77WzLGSIB5KaWgTaNTs2lNVEI6VQ==}
+ html-entities@2.6.0:
+ resolution: {integrity: sha512-kig+rMn/QOVRvr7c86gQ8lWXq+Hkv6CbAH1hLu+RG338StTpE8Z0b44SDVaqVu7HGKf27frdmUYEs9hTUX/cLQ==}
+
ignore@5.3.2:
resolution: {integrity: sha512-hsBTNUqQTDwkWtcdYI2i06Y/nUBEsNEDJKjWdigLvegy8kDuJAS8uRlpkkcQpyEXL0Z/pjDy5HBmMjRCJ2gq+g==}
engines: {node: '>= 4'}
@@ -1487,9 +1598,6 @@ packages:
resolution: {integrity: sha512-ekilCSN1jwRvIbgeg/57YFh8qQDNbwDb9xT/qu2DAHbFFZUicIl4ygVaAvzveMhMVr3LnpSKTNnwt8PoOfmKhQ==}
hasBin: true
- js-tokens@9.0.1:
- resolution: {integrity: sha512-mxa9E9ITFOt0ban3j6L5MpjwegGz6lBQmM1IJkWeBZGcMxto50+eWdjC/52xDbS2vy0k7vIMK0Fe2wfL9OQSpQ==}
-
js-yaml@4.1.0:
resolution: {integrity: sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA==}
hasBin: true
@@ -1502,9 +1610,9 @@ packages:
resolution: {integrity: sha512-iZ8Bdb84lWRuGHamRXFyML07r21pcwBrLkHEuHgEY5UbCouBwv7ECknDRKzsQIXMiqpPymqtIf8TC/shYKB5rw==}
engines: {node: '>=12.0.0'}
- jsdoc-type-pratt-parser@5.4.0:
- resolution: {integrity: sha512-F9GQ+F1ZU6qvSrZV8fNFpjDNf614YzR2eF6S0+XbDjAcUI28FSoXnYZFjQmb1kFx3rrJb5PnxUH3/Yti6fcM+g==}
- engines: {node: '>=12.0.0'}
+ jsdoc-type-pratt-parser@6.10.0:
+ resolution: {integrity: sha512-+LexoTRyYui5iOhJGn13N9ZazL23nAHGkXsa1p/C8yeq79WRfLBag6ZZ0FQG2aRoc9yfo59JT9EYCQonOkHKkQ==}
+ engines: {node: '>=20.0.0'}
jsesc@3.0.2:
resolution: {integrity: sha512-xKqzzWXDttJuOcawBt4KnKHHIf5oQ/Cxax+0PWFG+DFDgHNAdi+TXECADI+RYiFUMmx8792xsMbbgXj4CwnP4g==}
@@ -1522,6 +1630,9 @@ packages:
json-schema-traverse@0.4.1:
resolution: {integrity: sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==}
+ json-schema@0.4.0:
+ resolution: {integrity: sha512-es94M3nTIfsEPisRafak+HDLfHXnKBhV3vU5eqPcS3flIWqcxJWgXHXiey3YrpaNsanY5ei1VoYEbOzijuq9BA==}
+
json-stable-stringify-without-jsonify@1.0.1:
resolution: {integrity: sha512-Bdboy+l7tA3OGW6FjyFHWkP5LuByj1Tk33Ljyq0axyzdk9//JSi2u3fP1QSmd1KNwq6VOKYGlAu87CisVir6Pw==}
@@ -1562,11 +1673,8 @@ packages:
longest-streak@3.1.0:
resolution: {integrity: sha512-9Ri+o0JYgehTaVBBDoMqIl8GXtbWg711O3srftcHhZ0dqnETqLaoIK0x17fUw9rFSlK/0NlsKe0Ahhyl5pXE2g==}
- loupe@3.2.1:
- resolution: {integrity: sha512-CdzqowRJCeLU72bHvWqwRBBlLcMEtIvGrlvef74kMnV2AolS9Y8xUv1I0U/MNAWMhBlKIoyuEgoJ0t/bbwHbLQ==}
-
- magic-string@0.30.19:
- resolution: {integrity: sha512-2N21sPY9Ws53PZvsEpVtNuSW+ScYbQdp4b9qUaL+9QkHUrGFKo56Lg9Emg5s9V/qrtNBmiR01sYhUOwu3H+VOw==}
+ magic-string@0.30.21:
+ resolution: {integrity: sha512-vd2F4YUyEXKGcLHoq+TEyCjxueSeHnFxyyjNp80yg0XV4vUhnDer/lvvlqM/arB5bXQN5K2/3oinyCRyx8T2CQ==}
markdown-table@3.0.4:
resolution: {integrity: sha512-wiYz4+JrLyb/DqW2hkFJxP7Vd7JuTDm77fvbM8VfEQdmSMqcImWeeRbHwZjBjIFki/VaMK2BhFi7oUUZeM5bqw==}
@@ -1750,8 +1858,8 @@ packages:
engines: {node: ^14.16.0 || >=16.10.0}
hasBin: true
- object-deep-merge@1.0.5:
- resolution: {integrity: sha512-3DioFgOzetbxbeUq8pB2NunXo8V0n4EvqsWM/cJoI6IA9zghd7cl/2pBOuWRf4dlvA+fcg5ugFMZaN2/RuoaGg==}
+ object-deep-merge@2.0.0:
+ resolution: {integrity: sha512-3DC3UMpeffLTHiuXSy/UG4NOIYTLlY9u3V82+djSCLYClWobZiS4ivYzpIUWrRY/nfsJ8cWsKyG3QfyLePmhvg==}
ofetch@1.4.1:
resolution: {integrity: sha512-QZj2DfGplQAr2oj9KzceK9Hwz6Whxazmn85yYeVuS3u9XTMOGMRx0kO95MQ+vLsj/S/NwBDMMLU5hpxvI6Tklw==}
@@ -1771,6 +1879,10 @@ packages:
resolution: {integrity: sha512-LaNjtRWUBY++zB5nE/NwcaoMylSPk+S+ZHNB1TzdbMJMny6dynpAGt7X/tl/QYq3TIeE6nxHppbo2LGymrG5Pw==}
engines: {node: '>=10'}
+ p-map@7.0.3:
+ resolution: {integrity: sha512-VkndIv2fIB99swvQoA65bm+fsmt6UNdGeIB0oxBs+WhAhdh08QA04JXpI7rbB9r08/nkbysKoya9rtDERYOYMA==}
+ engines: {node: '>=18'}
+
package-manager-detector@1.5.0:
resolution: {integrity: sha512-uBj69dVlYe/+wxj8JOpr97XfsxH/eumMt6HqjNTmJDf/6NO9s+0uxeOneIz3AsPt2m6y9PqzDzd3ATcU17MNfw==}
@@ -1799,10 +1911,6 @@ packages:
pathe@2.0.3:
resolution: {integrity: sha512-WUjGcAqP1gQacoQe+OBJsFA7Ld4DyXuUIjZ5cc75cLHvJ7dtNsTugphxIADwspS+AraAUePCKrSVtPLFj/F88w==}
- pathval@2.0.1:
- resolution: {integrity: sha512-//nshmD55c46FuFw26xV/xFAaB5HF9Xdap7HJBBnrKdAd6/GxDBaNA1870O79+9ueg61cZLSVc+OaFlfmObYVQ==}
- engines: {node: '>= 14.16'}
-
perfect-debounce@2.0.0:
resolution: {integrity: sha512-fkEH/OBiKrqqI/yIgjR92lMfs2K8105zt/VT6+7eTjNwisrsh47CeIED9z58zI7DfKdH3uHAn25ziRZn3kgAow==}
@@ -1875,6 +1983,10 @@ packages:
resolution: {integrity: sha512-cnE+y8bz4NhMjISKbgeVJtqNbtf5QpjZP+Bslo+UqkIt9QPnX9q095eiRRASJG1/tz6dlNr6Z5NsBiWYokp6EQ==}
hasBin: true
+ reserved-identifiers@1.2.0:
+ resolution: {integrity: sha512-yE7KUfFvaBFzGPs5H3Ops1RevfUEsDc5Iz65rOwWg4lE8HJSYtle77uul3+573457oHvBKuHYDl/xqUkKpEEdw==}
+ engines: {node: '>=18'}
+
resolve-from@4.0.0:
resolution: {integrity: sha512-pb/MYmXstAkysRFx8piNI1tGFNQIFA3vkE3Gq4EuA1dF6gHp/+vgZqsCGJapvy8N3Q+4o7FwvquPJcnZ7RYy4g==}
engines: {node: '>=4'}
@@ -1886,13 +1998,13 @@ packages:
resolution: {integrity: sha512-g6QUff04oZpHs0eG5p83rFLhHeV00ug/Yf9nZM6fLeUrPguBTkTQOdpAWWspMh55TZfVQDPaN3NQJfbVRAxdIw==}
engines: {iojs: '>=1.0.0', node: '>=0.10.0'}
- rolldown-plugin-dts@0.16.12:
- resolution: {integrity: sha512-9dGjm5oqtKcbZNhpzyBgb8KrYiU616A7IqcFWG7Msp1RKAXQ/hapjivRg+g5IYWSiFhnk3OKYV5T4Ft1t8Cczg==}
+ rolldown-plugin-dts@0.17.1:
+ resolution: {integrity: sha512-dQfoYD9kwSau7UQPg0UubprCDcwWeEKYd9SU9O2MpOdKy3VHy3/DaDF+x6w9+KE/w6J8qxkHVjwG1K2QmmQAFA==}
engines: {node: '>=20.18.0'}
peerDependencies:
'@ts-macro/tsc': ^0.3.6
'@typescript/native-preview': '>=7.0.0-dev.20250601.1'
- rolldown: ^1.0.0-beta.9
+ rolldown: ^1.0.0-beta.44
typescript: ^5.0.0
vue-tsc: ~3.1.0
peerDependenciesMeta:
@@ -1971,9 +2083,6 @@ packages:
resolution: {integrity: sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig==}
engines: {node: '>=8'}
- strip-literal@3.1.0:
- resolution: {integrity: sha512-8r3mkIM/2+PpjHoOtiAW8Rg3jJLHaV7xPwG+YRGrv6FP0wwk/toTpATxWYOW0BKdWwl82VT2tFYi5DlROa0Mxg==}
-
supports-color@7.2.0:
resolution: {integrity: sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==}
engines: {node: '>=8'}
@@ -1999,22 +2108,18 @@ packages:
resolution: {integrity: sha512-j2Zq4NyQYG5XMST4cbs02Ak8iJUdxRM0XI5QyxXuZOzKOINmWurp3smXu3y5wDcJrptwpSjgXHzIQxR0omXljQ==}
engines: {node: '>=12.0.0'}
- tinypool@1.1.1:
- resolution: {integrity: sha512-Zba82s87IFq9A9XmjiX5uZA/ARWDrB03OHlq+Vw1fSdt0I+4/Kutwy8BP4Y/y/aORMo61FQ0vIb5j44vSo5Pkg==}
- engines: {node: ^18.0.0 || >=20.0.0}
-
- tinyrainbow@2.0.0:
- resolution: {integrity: sha512-op4nsTR47R6p0vMUUoYl/a+ljLFVtlfaXkLQmqfLR1qHma1h/ysYk4hEXZ880bf2CYgTskvTa/e196Vd5dDQXw==}
- engines: {node: '>=14.0.0'}
-
- tinyspy@4.0.4:
- resolution: {integrity: sha512-azl+t0z7pw/z958Gy9svOTuzqIk6xq+NSheJzn5MMWtWTFywIacg2wUlzKFGtt3cthx0r2SxMK0yzJOR0IES7Q==}
+ tinyrainbow@3.0.3:
+ resolution: {integrity: sha512-PSkbLUoxOFRzJYjjxHJt9xro7D+iilgMX/C9lawzVuYiIdcihh9DXmVibBe8lmcFrRi/VzlPjBxbN7rH24q8/Q==}
engines: {node: '>=14.0.0'}
to-regex-range@5.0.1:
resolution: {integrity: sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==}
engines: {node: '>=8.0'}
+ to-valid-identifier@1.0.0:
+ resolution: {integrity: sha512-41wJyvKep3yT2tyPqX/4blcfybknGB4D+oETKLs7Q76UiPqRpUJK3hr1nxelyYO0PHKVzJwlu0aCeEAsGI6rpw==}
+ engines: {node: '>=20'}
+
toml-eslint-parser@0.10.0:
resolution: {integrity: sha512-khrZo4buq4qVmsGzS5yQjKe/WsFvV8fGfOjDQN0q4iy9FjRfPWRgTFrU8u1R2iu/SfWLhY9WnCi4Jhdrcbtg+g==}
engines: {node: ^12.22.0 || ^14.17.0 || >=16.0.0}
@@ -2034,8 +2139,8 @@ packages:
peerDependencies:
typescript: '>=4.0.0'
- tsdown@0.15.9:
- resolution: {integrity: sha512-C0EJYpXIYdlJokTumIL4lmv/wEiB20oa6iiYsXFE7Q0VKF3Ju6TQ7XAn4JQdm+2iQGEfl8cnEKcX5DB7iVR5Dw==}
+ tsdown@0.15.10:
+ resolution: {integrity: sha512-8zbSN4GW7ZzhjIYl/rWrruGzl1cJiDtAjb8l5XVF2cVme1+aDLVcExw+Ph4gNcfdGg6ZfYPh5kmcpIfh5xHisw==}
engines: {node: '>=20.19.0'}
hasBin: true
peerDependencies:
@@ -2068,10 +2173,6 @@ packages:
resolution: {integrity: sha512-XleUoc9uwGXqjWwXaUTZAmzMcFZ5858QA2vvx1Ur5xIcixXIP+8LnFDgRplU30us6teqdlskFfu+ae4K79Ooew==}
engines: {node: '>= 0.8.0'}
- type-fest@4.2.0:
- resolution: {integrity: sha512-5zknd7Dss75pMSED270A1RQS3KloqRJA9XbXLe0eCxyw7xXFb3rd+9B0UQ/0E+LQT6lnrLviEolYORlRWamn4w==}
- engines: {node: '>=16'}
-
typescript@5.9.3:
resolution: {integrity: sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==}
engines: {node: '>=14.17'}
@@ -2098,6 +2199,11 @@ packages:
unist-util-visit@5.0.0:
resolution: {integrity: sha512-MR04uvD+07cwl/yhVuVWAtw+3GOR/knlL55Nd/wAdblk27GCVt3lqpTivy/tkJcZoNPzTwS1Y+KMojlLDhoTzg==}
+ unrun@0.2.0:
+ resolution: {integrity: sha512-iaCxWG/6kmjP3wUTBheowjFm6LuI8fd/A3Uz7DbMoz8HvQsJThh7tWZKWJfVltOSK3LuIJFzepr7g6fbuhUasw==}
+ engines: {node: '>=20.19.0'}
+ hasBin: true
+
untyped@2.0.0:
resolution: {integrity: sha512-nwNCjxJTjNuLCgFr42fEak5OcLuB3ecca+9ksPFNvtfYSLpjf+iJqSIaSnIile6ZPbKYxI5k2AfXqeopGudK/g==}
hasBin: true
@@ -2114,13 +2220,8 @@ packages:
util-deprecate@1.0.2:
resolution: {integrity: sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==}
- vite-node@3.2.4:
- resolution: {integrity: sha512-EbKSKh+bh1E1IFxeO0pg1n4dvoOTt0UDiXMd/qn++r98+jPO1xtJilvXldeuQ8giIB5IkpjCgMleHMNEsGH6pg==}
- engines: {node: ^18.0.0 || ^20.0.0 || >=22.0.0}
- hasBin: true
-
- vite@7.1.11:
- resolution: {integrity: sha512-uzcxnSDVjAopEUjljkWh8EIrg6tlzrjFUfMcR1EVsRDGwf/ccef0qQPRyOrROwhrTDaApueq+ja+KLPlzR/zdg==}
+ vite@7.1.12:
+ resolution: {integrity: sha512-ZWyE8YXEXqJrrSLvYgrRP7p62OziLW7xI5HYGWFzOvupfAlrLvURSzv/FyGyy0eidogEM3ujU+kUG1zuHgb6Ug==}
engines: {node: ^20.19.0 || >=22.12.0}
hasBin: true
peerDependencies:
@@ -2159,16 +2260,18 @@ packages:
yaml:
optional: true
- vitest@3.2.4:
- resolution: {integrity: sha512-LUCP5ev3GURDysTWiP47wRRUpLKMOfPh+yKTx3kVIEiu5KOMeqzpnYNsKyOoVrULivR8tLcks4+lga33Whn90A==}
- engines: {node: ^18.0.0 || ^20.0.0 || >=22.0.0}
+ vitest@4.0.3:
+ resolution: {integrity: sha512-IUSop8jgaT7w0g1yOM/35qVtKjr/8Va4PrjzH1OUb0YH4c3OXB2lCZDkMAB6glA8T5w8S164oJGsbcmAecr4sA==}
+ engines: {node: ^20.0.0 || ^22.0.0 || >=24.0.0}
hasBin: true
peerDependencies:
'@edge-runtime/vm': '*'
'@types/debug': ^4.1.12
- '@types/node': ^18.0.0 || ^20.0.0 || >=22.0.0
- '@vitest/browser': 3.2.4
- '@vitest/ui': 3.2.4
+ '@types/node': ^20.0.0 || ^22.0.0 || >=24.0.0
+ '@vitest/browser-playwright': 4.0.3
+ '@vitest/browser-preview': 4.0.3
+ '@vitest/browser-webdriverio': 4.0.3
+ '@vitest/ui': 4.0.3
happy-dom: '*'
jsdom: '*'
peerDependenciesMeta:
@@ -2178,7 +2281,11 @@ packages:
optional: true
'@types/node':
optional: true
- '@vitest/browser':
+ '@vitest/browser-playwright':
+ optional: true
+ '@vitest/browser-preview':
+ optional: true
+ '@vitest/browser-webdriverio':
optional: true
'@vitest/ui':
optional: true
@@ -2224,12 +2331,51 @@ packages:
resolution: {integrity: sha512-rVksvsnNCdJ/ohGc6xgPwyN8eheCxsiLM8mxuE/t/mOVqJewPuO1miLpTHQiRgTKCLexL4MeAFVagts7HmNZ2Q==}
engines: {node: '>=10'}
+ zod@4.1.12:
+ resolution: {integrity: sha512-JInaHOamG8pt5+Ey8kGmdcAcg3OL9reK8ltczgHTAwNhMys/6ThXHityHxVV2p3fkw/c+MAvBHFVYHFZDmjMCQ==}
+
zwitch@2.0.4:
resolution: {integrity: sha512-bXE4cR/kVZhKZX/RjPEflHaKVhUVl85noU3v6b8apfQEc1x4A+zBxjZ4lN8LqGd6WZ3dl98pY4o717VFmoPp+A==}
snapshots:
- '@antfu/eslint-config@6.0.0(@vue/compiler-sfc@3.5.22)(eslint@9.38.0(jiti@2.6.1))(typescript@5.9.3)(vitest@3.2.4(@types/debug@4.1.12)(@types/node@24.9.1)(jiti@2.6.1)(tsx@4.20.6)(yaml@2.8.1))':
+ '@ai-sdk/anthropic@2.0.37(zod@4.1.12)':
+ dependencies:
+ '@ai-sdk/provider': 2.0.0
+ '@ai-sdk/provider-utils': 3.0.12(zod@4.1.12)
+ zod: 4.1.12
+
+ '@ai-sdk/gateway@2.0.1(zod@4.1.12)':
+ dependencies:
+ '@ai-sdk/provider': 2.0.0
+ '@ai-sdk/provider-utils': 3.0.12(zod@4.1.12)
+ '@vercel/oidc': 3.0.3
+ zod: 4.1.12
+
+ '@ai-sdk/google@2.0.23(zod@4.1.12)':
+ dependencies:
+ '@ai-sdk/provider': 2.0.0
+ '@ai-sdk/provider-utils': 3.0.12(zod@4.1.12)
+ zod: 4.1.12
+
+ '@ai-sdk/openai@2.0.53(zod@4.1.12)':
+ dependencies:
+ '@ai-sdk/provider': 2.0.0
+ '@ai-sdk/provider-utils': 3.0.12(zod@4.1.12)
+ zod: 4.1.12
+
+ '@ai-sdk/provider-utils@3.0.12(zod@4.1.12)':
+ dependencies:
+ '@ai-sdk/provider': 2.0.0
+ '@standard-schema/spec': 1.0.0
+ eventsource-parser: 3.0.6
+ zod: 4.1.12
+
+ '@ai-sdk/provider@2.0.0':
+ dependencies:
+ json-schema: 0.4.0
+
+ '@antfu/eslint-config@6.1.0(@vue/compiler-sfc@3.5.22)(eslint@9.38.0(jiti@2.6.1))(typescript@5.9.3)(vitest@4.0.3(@types/debug@4.1.12)(@types/node@24.9.1)(jiti@2.6.1)(tsx@4.20.6)(yaml@2.8.1))':
dependencies:
'@antfu/install-pkg': 1.1.0
'@clack/prompts': 0.11.0
@@ -2238,7 +2384,7 @@ snapshots:
'@stylistic/eslint-plugin': 5.5.0(eslint@9.38.0(jiti@2.6.1))
'@typescript-eslint/eslint-plugin': 8.46.2(@typescript-eslint/parser@8.46.2(eslint@9.38.0(jiti@2.6.1))(typescript@5.9.3))(eslint@9.38.0(jiti@2.6.1))(typescript@5.9.3)
'@typescript-eslint/parser': 8.46.2(eslint@9.38.0(jiti@2.6.1))(typescript@5.9.3)
- '@vitest/eslint-plugin': 1.3.23(eslint@9.38.0(jiti@2.6.1))(typescript@5.9.3)(vitest@3.2.4(@types/debug@4.1.12)(@types/node@24.9.1)(jiti@2.6.1)(tsx@4.20.6)(yaml@2.8.1))
+ '@vitest/eslint-plugin': 1.3.25(eslint@9.38.0(jiti@2.6.1))(typescript@5.9.3)(vitest@4.0.3(@types/debug@4.1.12)(@types/node@24.9.1)(jiti@2.6.1)(tsx@4.20.6)(yaml@2.8.1))
ansis: 4.2.0
cac: 6.7.14
eslint: 9.38.0(jiti@2.6.1)
@@ -2248,7 +2394,7 @@ snapshots:
eslint-plugin-antfu: 3.1.1(eslint@9.38.0(jiti@2.6.1))
eslint-plugin-command: 3.3.1(eslint@9.38.0(jiti@2.6.1))
eslint-plugin-import-lite: 0.3.0(eslint@9.38.0(jiti@2.6.1))(typescript@5.9.3)
- eslint-plugin-jsdoc: 59.1.0(eslint@9.38.0(jiti@2.6.1))
+ eslint-plugin-jsdoc: 61.1.9(eslint@9.38.0(jiti@2.6.1))
eslint-plugin-jsonc: 2.21.0(eslint@9.38.0(jiti@2.6.1))
eslint-plugin-n: 17.23.1(eslint@9.38.0(jiti@2.6.1))(typescript@5.9.3)
eslint-plugin-no-only-tests: 3.3.0
@@ -2280,28 +2426,28 @@ snapshots:
package-manager-detector: 1.5.0
tinyexec: 1.0.1
- '@babel/generator@7.28.3':
+ '@babel/generator@7.28.5':
dependencies:
- '@babel/parser': 7.28.4
- '@babel/types': 7.28.4
+ '@babel/parser': 7.28.5
+ '@babel/types': 7.28.5
'@jridgewell/gen-mapping': 0.3.13
'@jridgewell/trace-mapping': 0.3.31
jsesc: 3.1.0
'@babel/helper-string-parser@7.27.1': {}
- '@babel/helper-validator-identifier@7.27.1': {}
+ '@babel/helper-validator-identifier@7.28.5': {}
- '@babel/parser@7.28.4':
+ '@babel/parser@7.28.5':
dependencies:
- '@babel/types': 7.28.4
+ '@babel/types': 7.28.5
'@babel/runtime@7.28.4': {}
- '@babel/types@7.28.4':
+ '@babel/types@7.28.5':
dependencies:
'@babel/helper-string-parser': 7.27.1
- '@babel/helper-validator-identifier': 7.27.1
+ '@babel/helper-validator-identifier': 7.28.5
'@clack/core@0.5.0':
dependencies:
@@ -2338,13 +2484,15 @@ snapshots:
esquery: 1.6.0
jsdoc-type-pratt-parser: 4.1.0
- '@es-joy/jsdoccomment@0.58.0':
+ '@es-joy/jsdoccomment@0.76.0':
dependencies:
'@types/estree': 1.0.8
'@typescript-eslint/types': 8.46.2
comment-parser: 1.4.1
esquery: 1.6.0
- jsdoc-type-pratt-parser: 5.4.0
+ jsdoc-type-pratt-parser: 6.10.0
+
+ '@es-joy/resolve.exports@1.0.0': {}
'@esbuild/aix-ppc64@0.25.11':
optional: true
@@ -2505,6 +2653,8 @@ snapshots:
'@eslint/core': 0.16.0
levn: 0.4.1
+ '@faker-js/faker@10.1.0': {}
+
'@humanfs/core@0.19.1': {}
'@humanfs/node@0.16.7':
@@ -2549,6 +2699,10 @@ snapshots:
'@nodelib/fs.scandir': 2.1.5
fastq: 1.19.1
+ '@opentelemetry/api@1.9.0': {}
+
+ '@oxc-project/runtime@0.95.0': {}
+
'@oxc-project/types@0.95.0': {}
'@parcel/watcher-android-arm64@2.5.1':
@@ -2729,6 +2883,10 @@ snapshots:
'@rollup/rollup-win32-x64-msvc@4.52.5':
optional: true
+ '@sindresorhus/base62@1.0.0': {}
+
+ '@standard-schema/spec@1.0.0': {}
+
'@stylistic/eslint-plugin@5.5.0(eslint@9.38.0(jiti@2.6.1))':
dependencies:
'@eslint-community/eslint-utils': 4.9.0(eslint@9.38.0(jiti@2.6.1))
@@ -2864,62 +3022,61 @@ snapshots:
'@typescript-eslint/types': 8.46.2
eslint-visitor-keys: 4.2.1
- '@vitest/eslint-plugin@1.3.23(eslint@9.38.0(jiti@2.6.1))(typescript@5.9.3)(vitest@3.2.4(@types/debug@4.1.12)(@types/node@24.9.1)(jiti@2.6.1)(tsx@4.20.6)(yaml@2.8.1))':
+ '@vercel/oidc@3.0.3': {}
+
+ '@vitest/eslint-plugin@1.3.25(eslint@9.38.0(jiti@2.6.1))(typescript@5.9.3)(vitest@4.0.3(@types/debug@4.1.12)(@types/node@24.9.1)(jiti@2.6.1)(tsx@4.20.6)(yaml@2.8.1))':
dependencies:
'@typescript-eslint/scope-manager': 8.46.2
'@typescript-eslint/utils': 8.46.2(eslint@9.38.0(jiti@2.6.1))(typescript@5.9.3)
eslint: 9.38.0(jiti@2.6.1)
optionalDependencies:
typescript: 5.9.3
- vitest: 3.2.4(@types/debug@4.1.12)(@types/node@24.9.1)(jiti@2.6.1)(tsx@4.20.6)(yaml@2.8.1)
+ vitest: 4.0.3(@types/debug@4.1.12)(@types/node@24.9.1)(jiti@2.6.1)(tsx@4.20.6)(yaml@2.8.1)
transitivePeerDependencies:
- supports-color
- '@vitest/expect@3.2.4':
+ '@vitest/expect@4.0.3':
dependencies:
+ '@standard-schema/spec': 1.0.0
'@types/chai': 5.2.3
- '@vitest/spy': 3.2.4
- '@vitest/utils': 3.2.4
- chai: 5.3.3
- tinyrainbow: 2.0.0
+ '@vitest/spy': 4.0.3
+ '@vitest/utils': 4.0.3
+ chai: 6.2.0
+ tinyrainbow: 3.0.3
- '@vitest/mocker@3.2.4(vite@7.1.11(@types/node@24.9.1)(jiti@2.6.1)(tsx@4.20.6)(yaml@2.8.1))':
+ '@vitest/mocker@4.0.3(vite@7.1.12(@types/node@24.9.1)(jiti@2.6.1)(tsx@4.20.6)(yaml@2.8.1))':
dependencies:
- '@vitest/spy': 3.2.4
+ '@vitest/spy': 4.0.3
estree-walker: 3.0.3
- magic-string: 0.30.19
+ magic-string: 0.30.21
optionalDependencies:
- vite: 7.1.11(@types/node@24.9.1)(jiti@2.6.1)(tsx@4.20.6)(yaml@2.8.1)
+ vite: 7.1.12(@types/node@24.9.1)(jiti@2.6.1)(tsx@4.20.6)(yaml@2.8.1)
- '@vitest/pretty-format@3.2.4':
+ '@vitest/pretty-format@4.0.3':
dependencies:
- tinyrainbow: 2.0.0
+ tinyrainbow: 3.0.3
- '@vitest/runner@3.2.4':
+ '@vitest/runner@4.0.3':
dependencies:
- '@vitest/utils': 3.2.4
- pathe: 2.0.3
- strip-literal: 3.1.0
-
- '@vitest/snapshot@3.2.4':
- dependencies:
- '@vitest/pretty-format': 3.2.4
- magic-string: 0.30.19
+ '@vitest/utils': 4.0.3
pathe: 2.0.3
- '@vitest/spy@3.2.4':
+ '@vitest/snapshot@4.0.3':
dependencies:
- tinyspy: 4.0.4
+ '@vitest/pretty-format': 4.0.3
+ magic-string: 0.30.21
+ pathe: 2.0.3
- '@vitest/utils@3.2.4':
+ '@vitest/spy@4.0.3': {}
+
+ '@vitest/utils@4.0.3':
dependencies:
- '@vitest/pretty-format': 3.2.4
- loupe: 3.2.1
- tinyrainbow: 2.0.0
+ '@vitest/pretty-format': 4.0.3
+ tinyrainbow: 3.0.3
'@vue/compiler-core@3.5.22':
dependencies:
- '@babel/parser': 7.28.4
+ '@babel/parser': 7.28.5
'@vue/shared': 3.5.22
entities: 4.5.0
estree-walker: 2.0.2
@@ -2932,13 +3089,13 @@ snapshots:
'@vue/compiler-sfc@3.5.22':
dependencies:
- '@babel/parser': 7.28.4
+ '@babel/parser': 7.28.5
'@vue/compiler-core': 3.5.22
'@vue/compiler-dom': 3.5.22
'@vue/compiler-ssr': 3.5.22
'@vue/shared': 3.5.22
estree-walker: 2.0.2
- magic-string: 0.30.19
+ magic-string: 0.30.21
postcss: 8.5.6
source-map-js: 1.2.1
@@ -2955,6 +3112,14 @@ snapshots:
acorn@8.15.0: {}
+ ai@5.0.80(zod@4.1.12):
+ dependencies:
+ '@ai-sdk/gateway': 2.0.1(zod@4.1.12)
+ '@ai-sdk/provider': 2.0.0
+ '@ai-sdk/provider-utils': 3.0.12(zod@4.1.12)
+ '@opentelemetry/api': 1.9.0
+ zod: 4.1.12
+
ajv@6.12.6:
dependencies:
fast-deep-equal: 3.1.3
@@ -2978,7 +3143,7 @@ snapshots:
ast-kit@2.1.3:
dependencies:
- '@babel/parser': 7.28.4
+ '@babel/parser': 7.28.5
pathe: 2.0.3
automd@0.4.2:
@@ -2990,7 +3155,7 @@ snapshots:
defu: 6.1.4
destr: 2.0.5
didyoumean2: 7.0.4
- magic-string: 0.30.19
+ magic-string: 0.30.21
mdbox: 0.1.1
mlly: 1.8.0
ofetch: 1.4.1
@@ -3005,7 +3170,7 @@ snapshots:
balanced-match@1.0.2: {}
- baseline-browser-mapping@2.8.19: {}
+ baseline-browser-mapping@2.8.20: {}
birpc@2.6.1: {}
@@ -3026,9 +3191,9 @@ snapshots:
browserslist@4.27.0:
dependencies:
- baseline-browser-mapping: 2.8.19
+ baseline-browser-mapping: 2.8.20
caniuse-lite: 1.0.30001751
- electron-to-chromium: 1.5.238
+ electron-to-chromium: 1.5.240
node-releases: 2.0.26
update-browserslist-db: 1.1.4(browserslist@4.27.0)
@@ -3073,13 +3238,7 @@ snapshots:
ccount@2.0.1: {}
- chai@5.3.3:
- dependencies:
- assertion-error: 2.0.1
- check-error: 2.1.1
- deep-eql: 5.0.2
- loupe: 3.2.1
- pathval: 2.0.1
+ chai@6.2.0: {}
chalk@4.1.2:
dependencies:
@@ -3090,8 +3249,6 @@ snapshots:
character-entities@2.0.2: {}
- check-error@2.1.1: {}
-
chokidar@4.0.3:
dependencies:
readdirp: 4.1.2
@@ -3134,6 +3291,8 @@ snapshots:
cssesc@3.0.0: {}
+ csv-stringify@6.6.0: {}
+
debug@4.4.3:
dependencies:
ms: 2.1.3
@@ -3142,8 +3301,6 @@ snapshots:
dependencies:
character-entities: 2.0.2
- deep-eql@5.0.2: {}
-
deep-is@0.1.4: {}
defu@6.1.4: {}
@@ -3172,7 +3329,7 @@ snapshots:
dts-resolver@2.1.2: {}
- electron-to-chromium@1.5.238: {}
+ electron-to-chromium@1.5.240: {}
empathic@2.0.0: {}
@@ -3275,9 +3432,10 @@ snapshots:
optionalDependencies:
typescript: 5.9.3
- eslint-plugin-jsdoc@59.1.0(eslint@9.38.0(jiti@2.6.1)):
+ eslint-plugin-jsdoc@61.1.9(eslint@9.38.0(jiti@2.6.1)):
dependencies:
- '@es-joy/jsdoccomment': 0.58.0
+ '@es-joy/jsdoccomment': 0.76.0
+ '@es-joy/resolve.exports': 1.0.0
are-docs-informative: 0.0.2
comment-parser: 1.4.1
debug: 4.4.3
@@ -3285,10 +3443,12 @@ snapshots:
eslint: 9.38.0(jiti@2.6.1)
espree: 10.4.0
esquery: 1.6.0
- object-deep-merge: 1.0.5
+ html-entities: 2.6.0
+ object-deep-merge: 2.0.0
parse-imports-exports: 0.2.4
semver: 7.7.3
spdx-expression-parse: 4.0.0
+ to-valid-identifier: 1.0.0
transitivePeerDependencies:
- supports-color
@@ -3367,7 +3527,7 @@ snapshots:
eslint-plugin-unicorn@61.0.2(eslint@9.38.0(jiti@2.6.1)):
dependencies:
- '@babel/helper-validator-identifier': 7.27.1
+ '@babel/helper-validator-identifier': 7.28.5
'@eslint-community/eslint-utils': 4.9.0(eslint@9.38.0(jiti@2.6.1))
'@eslint/plugin-kit': 0.3.5
change-case: 5.4.4
@@ -3504,6 +3664,8 @@ snapshots:
esutils@2.0.3: {}
+ eventsource-parser@3.0.6: {}
+
expect-type@1.2.2: {}
exsolve@1.0.7: {}
@@ -3604,6 +3766,8 @@ snapshots:
hookable@5.5.3: {}
+ html-entities@2.6.0: {}
+
ignore@5.3.2: {}
ignore@7.0.5: {}
@@ -3633,8 +3797,6 @@ snapshots:
jiti@2.6.1: {}
- js-tokens@9.0.1: {}
-
js-yaml@4.1.0:
dependencies:
argparse: 2.0.1
@@ -3643,7 +3805,7 @@ snapshots:
jsdoc-type-pratt-parser@4.8.0: {}
- jsdoc-type-pratt-parser@5.4.0: {}
+ jsdoc-type-pratt-parser@6.10.0: {}
jsesc@3.0.2: {}
@@ -3653,6 +3815,8 @@ snapshots:
json-schema-traverse@0.4.1: {}
+ json-schema@0.4.0: {}
+
json-stable-stringify-without-jsonify@1.0.1: {}
jsonc-eslint-parser@2.4.1:
@@ -3693,9 +3857,7 @@ snapshots:
longest-streak@3.1.0: {}
- loupe@3.2.1: {}
-
- magic-string@0.30.19:
+ magic-string@0.30.21:
dependencies:
'@jridgewell/sourcemap-codec': 1.5.5
@@ -4066,9 +4228,7 @@ snapshots:
pkg-types: 2.3.0
tinyexec: 1.0.1
- object-deep-merge@1.0.5:
- dependencies:
- type-fest: 4.2.0
+ object-deep-merge@2.0.0: {}
ofetch@1.4.1:
dependencies:
@@ -4095,6 +4255,8 @@ snapshots:
dependencies:
p-limit: 3.1.0
+ p-map@7.0.3: {}
+
package-manager-detector@1.5.0: {}
parent-module@1.0.1:
@@ -4115,8 +4277,6 @@ snapshots:
pathe@2.0.3: {}
- pathval@2.0.1: {}
-
perfect-debounce@2.0.0: {}
picocolors@1.1.1: {}
@@ -4184,23 +4344,25 @@ snapshots:
dependencies:
jsesc: 3.0.2
+ reserved-identifiers@1.2.0: {}
+
resolve-from@4.0.0: {}
resolve-pkg-maps@1.0.0: {}
reusify@1.1.0: {}
- rolldown-plugin-dts@0.16.12(rolldown@1.0.0-beta.44)(typescript@5.9.3):
+ rolldown-plugin-dts@0.17.1(rolldown@1.0.0-beta.44)(typescript@5.9.3):
dependencies:
- '@babel/generator': 7.28.3
- '@babel/parser': 7.28.4
- '@babel/types': 7.28.4
+ '@babel/generator': 7.28.5
+ '@babel/parser': 7.28.5
+ '@babel/types': 7.28.5
ast-kit: 2.1.3
birpc: 2.6.1
debug: 4.4.3
dts-resolver: 2.1.2
get-tsconfig: 4.13.0
- magic-string: 0.30.19
+ magic-string: 0.30.21
rolldown: 1.0.0-beta.44
optionalDependencies:
typescript: 5.9.3
@@ -4299,10 +4461,6 @@ snapshots:
strip-json-comments@3.1.1: {}
- strip-literal@3.1.0:
- dependencies:
- js-tokens: 9.0.1
-
supports-color@7.2.0:
dependencies:
has-flag: 4.0.0
@@ -4324,16 +4482,17 @@ snapshots:
fdir: 6.5.0(picomatch@4.0.3)
picomatch: 4.0.3
- tinypool@1.1.1: {}
-
- tinyrainbow@2.0.0: {}
-
- tinyspy@4.0.4: {}
+ tinyrainbow@3.0.3: {}
to-regex-range@5.0.1:
dependencies:
is-number: 7.0.0
+ to-valid-identifier@1.0.0:
+ dependencies:
+ '@sindresorhus/base62': 1.0.0
+ reserved-identifiers: 1.2.0
+
toml-eslint-parser@0.10.0:
dependencies:
eslint-visitor-keys: 3.4.3
@@ -4349,7 +4508,7 @@ snapshots:
picomatch: 4.0.3
typescript: 5.9.3
- tsdown@0.15.9(typescript@5.9.3):
+ tsdown@0.15.10(typescript@5.9.3):
dependencies:
ansis: 4.2.0
cac: 6.7.14
@@ -4359,12 +4518,13 @@ snapshots:
empathic: 2.0.0
hookable: 5.5.3
rolldown: 1.0.0-beta.44
- rolldown-plugin-dts: 0.16.12(rolldown@1.0.0-beta.44)(typescript@5.9.3)
+ rolldown-plugin-dts: 0.17.1(rolldown@1.0.0-beta.44)(typescript@5.9.3)
semver: 7.7.3
tinyexec: 1.0.1
tinyglobby: 0.2.15
tree-kill: 1.2.2
unconfig: 7.3.3
+ unrun: 0.2.0
optionalDependencies:
typescript: 5.9.3
transitivePeerDependencies:
@@ -4388,8 +4548,6 @@ snapshots:
dependencies:
prelude-ls: 1.2.1
- type-fest@4.2.0: {}
-
typescript@5.9.3: {}
ufo@1.6.1: {}
@@ -4422,6 +4580,12 @@ snapshots:
unist-util-is: 6.0.1
unist-util-visit-parents: 6.0.2
+ unrun@0.2.0:
+ dependencies:
+ '@oxc-project/runtime': 0.95.0
+ rolldown: 1.0.0-beta.44
+ synckit: 0.11.11
+
untyped@2.0.0:
dependencies:
citty: 0.1.6
@@ -4442,28 +4606,7 @@ snapshots:
util-deprecate@1.0.2: {}
- vite-node@3.2.4(@types/node@24.9.1)(jiti@2.6.1)(tsx@4.20.6)(yaml@2.8.1):
- dependencies:
- cac: 6.7.14
- debug: 4.4.3
- es-module-lexer: 1.7.0
- pathe: 2.0.3
- vite: 7.1.11(@types/node@24.9.1)(jiti@2.6.1)(tsx@4.20.6)(yaml@2.8.1)
- transitivePeerDependencies:
- - '@types/node'
- - jiti
- - less
- - lightningcss
- - sass
- - sass-embedded
- - stylus
- - sugarss
- - supports-color
- - terser
- - tsx
- - yaml
-
- vite@7.1.11(@types/node@24.9.1)(jiti@2.6.1)(tsx@4.20.6)(yaml@2.8.1):
+ vite@7.1.12(@types/node@24.9.1)(jiti@2.6.1)(tsx@4.20.6)(yaml@2.8.1):
dependencies:
esbuild: 0.25.11
fdir: 6.5.0(picomatch@4.0.3)
@@ -4478,30 +4621,27 @@ snapshots:
tsx: 4.20.6
yaml: 2.8.1
- vitest@3.2.4(@types/debug@4.1.12)(@types/node@24.9.1)(jiti@2.6.1)(tsx@4.20.6)(yaml@2.8.1):
+ vitest@4.0.3(@types/debug@4.1.12)(@types/node@24.9.1)(jiti@2.6.1)(tsx@4.20.6)(yaml@2.8.1):
dependencies:
- '@types/chai': 5.2.3
- '@vitest/expect': 3.2.4
- '@vitest/mocker': 3.2.4(vite@7.1.11(@types/node@24.9.1)(jiti@2.6.1)(tsx@4.20.6)(yaml@2.8.1))
- '@vitest/pretty-format': 3.2.4
- '@vitest/runner': 3.2.4
- '@vitest/snapshot': 3.2.4
- '@vitest/spy': 3.2.4
- '@vitest/utils': 3.2.4
- chai: 5.3.3
+ '@vitest/expect': 4.0.3
+ '@vitest/mocker': 4.0.3(vite@7.1.12(@types/node@24.9.1)(jiti@2.6.1)(tsx@4.20.6)(yaml@2.8.1))
+ '@vitest/pretty-format': 4.0.3
+ '@vitest/runner': 4.0.3
+ '@vitest/snapshot': 4.0.3
+ '@vitest/spy': 4.0.3
+ '@vitest/utils': 4.0.3
debug: 4.4.3
+ es-module-lexer: 1.7.0
expect-type: 1.2.2
- magic-string: 0.30.19
+ magic-string: 0.30.21
pathe: 2.0.3
picomatch: 4.0.3
std-env: 3.10.0
tinybench: 2.9.0
tinyexec: 0.3.2
tinyglobby: 0.2.15
- tinypool: 1.1.1
- tinyrainbow: 2.0.0
- vite: 7.1.11(@types/node@24.9.1)(jiti@2.6.1)(tsx@4.20.6)(yaml@2.8.1)
- vite-node: 3.2.4(@types/node@24.9.1)(jiti@2.6.1)(tsx@4.20.6)(yaml@2.8.1)
+ tinyrainbow: 3.0.3
+ vite: 7.1.12(@types/node@24.9.1)(jiti@2.6.1)(tsx@4.20.6)(yaml@2.8.1)
why-is-node-running: 2.3.0
optionalDependencies:
'@types/debug': 4.1.12
@@ -4554,4 +4694,6 @@ snapshots:
yocto-queue@0.1.0: {}
+ zod@4.1.12: {}
+
zwitch@2.0.4: {}
diff --git a/pnpm-workspace.yaml b/pnpm-workspace.yaml
new file mode 100644
index 0000000..76137e2
--- /dev/null
+++ b/pnpm-workspace.yaml
@@ -0,0 +1,2 @@
+packages:
+ - benchmarks
diff --git a/scripts/generate-bench.ts b/scripts/generate-bench.ts
deleted file mode 100644
index a4d8950..0000000
--- a/scripts/generate-bench.ts
+++ /dev/null
@@ -1,213 +0,0 @@
-import * as fsp from 'node:fs/promises'
-import * as path from 'node:path'
-import * as url from 'node:url'
-import { encode } from 'gpt-tokenizer' // o200k_base encoding (default)
-import { encode as encodeToon } from '../src/index'
-
-interface BenchmarkResult {
- name: string
- emoji: string
- jsonTokens: number
- toonTokens: number
- savings: number
- savingsPercent: string
-}
-
-const rootDir = url.fileURLToPath(new URL('../', import.meta.url))
-const benchPath = path.join(rootDir, 'docs', 'benchmarks.md')
-
-const BENCHMARK_EXAMPLES = [
- {
- name: 'Simple user object',
- emoji: '๐ค',
- data: {
- id: 123,
- name: 'Alice',
- email: 'alice@example.com',
- active: true,
- },
- },
- {
- name: 'User with tags',
- emoji: '๐ท๏ธ',
- data: {
- user: {
- id: 123,
- name: 'Ada',
- tags: ['reading', 'gaming', 'coding'],
- active: true,
- },
- },
- },
- {
- name: 'Small product catalog',
- emoji: '๐ฆ',
- data: {
- items: [
- { sku: 'A1', name: 'Widget', qty: 2, price: 9.99 },
- { sku: 'B2', name: 'Gadget', qty: 1, price: 14.5 },
- { sku: 'C3', name: 'Doohickey', qty: 5, price: 7.25 },
- ],
- },
- },
- {
- name: 'API response with users',
- emoji: '๐ฅ',
- data: {
- users: [
- { id: 1, name: 'Alice', email: 'alice@example.com', active: true },
- { id: 2, name: 'Bob', email: 'bob@example.com', active: true },
- { id: 3, name: 'Charlie', email: 'charlie@example.com', active: false },
- ],
- total: 3,
- page: 1,
- },
- },
- {
- name: 'Nested configuration',
- emoji: 'โ๏ธ',
- data: {
- database: {
- host: 'localhost',
- port: 5432,
- credentials: {
- username: 'dbuser',
- password: 'secret123',
- },
- },
- cache: {
- enabled: true,
- ttl: 3600,
- },
- },
- },
- {
- name: 'E-commerce order',
- emoji: '๐',
- data: {
- orderId: 'ORD-2025-001',
- customer: {
- id: 456,
- name: 'Jane Doe',
- email: 'jane@example.com',
- },
- items: [
- { sku: 'PROD-A', name: 'Premium Widget', quantity: 2, price: 29.99 },
- { sku: 'PROD-B', name: 'Deluxe Gadget', quantity: 1, price: 49.99 },
- ],
- subtotal: 109.97,
- tax: 10.99,
- total: 120.96,
- status: 'pending',
- },
- },
- {
- name: 'Analytics data',
- emoji: '๐',
- data: {
- metrics: [
- { date: '2025-01-01', views: 1234, clicks: 89, conversions: 12 },
- { date: '2025-01-02', views: 2345, clicks: 156, conversions: 23 },
- { date: '2025-01-03', views: 1890, clicks: 123, conversions: 18 },
- { date: '2025-01-04', views: 3456, clicks: 234, conversions: 34 },
- { date: '2025-01-05', views: 2789, clicks: 178, conversions: 27 },
- ],
- },
- },
- {
- name: 'Large dataset (50 records)',
- emoji: '๐',
- data: {
- records: Array.from({ length: 50 }, (_, i) => ({
- id: i + 1,
- name: `User ${i + 1}`,
- email: `user${i + 1}@example.com`,
- score: (i * 7) % 100,
- active: i % 3 !== 0,
- })),
- },
- },
-] as const
-
-const DETAILED_EXAMPLE_INDICES = [2, 3, 6] // Small product catalog, API response, Analytics data
-
-// Calculate total savings
-let totalJsonTokens = 0
-let totalToonTokens = 0
-
-const results: BenchmarkResult[] = []
-
-for (const example of BENCHMARK_EXAMPLES) {
- const jsonString = JSON.stringify(example.data, null, 2)
- const toonString = encodeToon(example.data)
-
- const jsonTokens = encode(jsonString).length
- const toonTokens = encode(toonString).length
- const savings = jsonTokens - toonTokens
- const savingsPercent = ((savings / jsonTokens) * 100).toFixed(1)
-
- totalJsonTokens += jsonTokens
- totalToonTokens += toonTokens
-
- results.push({
- name: example.name,
- emoji: example.emoji,
- jsonTokens,
- toonTokens,
- savings,
- savingsPercent,
- })
-}
-
-const totalSavings = totalJsonTokens - totalToonTokens
-const totalSavingsPercent = ((totalSavings / totalJsonTokens) * 100).toFixed(1)
-
-// Generate markdown content matching README style
-const summaryRows = results
- .map(result => `| ${result.emoji} ${result.name} | ${result.jsonTokens} | ${result.toonTokens} | ${result.savings} | **${result.savingsPercent}%** |`)
- .join('\n')
-
-const detailedExamples = DETAILED_EXAMPLE_INDICES
- .map((exampleIndex, i) => {
- const example = BENCHMARK_EXAMPLES[exampleIndex]!
- const result = results[exampleIndex]!
- const separator = i < DETAILED_EXAMPLE_INDICES.length - 1 ? '\n\n---' : ''
-
- return `### ${result.emoji} ${result.name}
-
-**Savings: ${result.savings} tokens (${result.savingsPercent}% reduction)**
-
-**JSON** (${result.jsonTokens} tokens):
-
-\`\`\`json
-${JSON.stringify(example.data, null, 2)}
-\`\`\`
-
-**TOON** (${result.toonTokens} tokens):
-
-\`\`\`
-${encodeToon(example.data)}
-\`\`\`${separator}`
- })
- .join('\n\n')
-
-const markdown = `
-| Example | JSON | TOON | Tokens Saved | Reduction |
-| ------- | ---- | ---- | ------------ | --------- |
-${summaryRows}
-| **Total** | **${totalJsonTokens}** | **${totalToonTokens}** | **${totalSavings}** | **${totalSavingsPercent}%** |
-
-
-View detailed results
-
-${detailedExamples}
-
-
-`.trimStart()
-
-console.log(markdown)
-
-await fsp.mkdir(path.join(rootDir, 'docs'), { recursive: true })
-await fsp.writeFile(benchPath, markdown, 'utf-8')
-
-console.log(`โ
Benchmark written to ${benchPath}`)