mirror of
https://github.com/voson-wang/toon.git
synced 2026-01-29 23:34:10 +08:00
docs: overhaul retrieval accuracy benchmark
This commit is contained in:
188
README.md
188
README.md
@@ -87,11 +87,11 @@ Total ████████████░░░░░
|
|||||||
"repo": "freeCodeCamp/freeCodeCamp",
|
"repo": "freeCodeCamp/freeCodeCamp",
|
||||||
"description": "freeCodeCamp.org's open-source codebase and curriculum. Learn math, programming,…",
|
"description": "freeCodeCamp.org's open-source codebase and curriculum. Learn math, programming,…",
|
||||||
"createdAt": "2014-12-24T17:49:19Z",
|
"createdAt": "2014-12-24T17:49:19Z",
|
||||||
"updatedAt": "2025-10-27T07:40:58Z",
|
"updatedAt": "2025-10-28T11:58:08Z",
|
||||||
"pushedAt": "2025-10-26T11:31:08Z",
|
"pushedAt": "2025-10-28T10:17:16Z",
|
||||||
"stars": 430828,
|
"stars": 430886,
|
||||||
"watchers": 8582,
|
"watchers": 8583,
|
||||||
"forks": 42136,
|
"forks": 42146,
|
||||||
"defaultBranch": "main"
|
"defaultBranch": "main"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -100,11 +100,11 @@ Total ████████████░░░░░
|
|||||||
"repo": "codecrafters-io/build-your-own-x",
|
"repo": "codecrafters-io/build-your-own-x",
|
||||||
"description": "Master programming by recreating your favorite technologies from scratch.",
|
"description": "Master programming by recreating your favorite technologies from scratch.",
|
||||||
"createdAt": "2018-05-09T12:03:18Z",
|
"createdAt": "2018-05-09T12:03:18Z",
|
||||||
"updatedAt": "2025-10-27T07:43:25Z",
|
"updatedAt": "2025-10-28T12:37:11Z",
|
||||||
"pushedAt": "2025-10-10T18:45:01Z",
|
"pushedAt": "2025-10-10T18:45:01Z",
|
||||||
"stars": 430102,
|
"stars": 430877,
|
||||||
"watchers": 6322,
|
"watchers": 6332,
|
||||||
"forks": 40388,
|
"forks": 40453,
|
||||||
"defaultBranch": "master"
|
"defaultBranch": "master"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -113,11 +113,11 @@ Total ████████████░░░░░
|
|||||||
"repo": "sindresorhus/awesome",
|
"repo": "sindresorhus/awesome",
|
||||||
"description": "😎 Awesome lists about all kinds of interesting topics",
|
"description": "😎 Awesome lists about all kinds of interesting topics",
|
||||||
"createdAt": "2014-07-11T13:42:37Z",
|
"createdAt": "2014-07-11T13:42:37Z",
|
||||||
"updatedAt": "2025-10-27T07:44:27Z",
|
"updatedAt": "2025-10-28T12:40:21Z",
|
||||||
"pushedAt": "2025-10-23T17:26:53Z",
|
"pushedAt": "2025-10-27T17:57:31Z",
|
||||||
"stars": 409760,
|
"stars": 410052,
|
||||||
"watchers": 8016,
|
"watchers": 8017,
|
||||||
"forks": 32015,
|
"forks": 32029,
|
||||||
"defaultBranch": "main"
|
"defaultBranch": "main"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
@@ -128,9 +128,9 @@ Total ████████████░░░░░
|
|||||||
|
|
||||||
```
|
```
|
||||||
repositories[3]{id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch}:
|
repositories[3]{id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch}:
|
||||||
28457823,freeCodeCamp,freeCodeCamp/freeCodeCamp,"freeCodeCamp.org's open-source codebase and curriculum. Learn math, programming,…","2014-12-24T17:49:19Z","2025-10-27T07:40:58Z","2025-10-26T11:31:08Z",430828,8582,42136,main
|
28457823,freeCodeCamp,freeCodeCamp/freeCodeCamp,"freeCodeCamp.org's open-source codebase and curriculum. Learn math, programming,…","2014-12-24T17:49:19Z","2025-10-28T11:58:08Z","2025-10-28T10:17:16Z",430886,8583,42146,main
|
||||||
132750724,build-your-own-x,codecrafters-io/build-your-own-x,Master programming by recreating your favorite technologies from scratch.,"2018-05-09T12:03:18Z","2025-10-27T07:43:25Z","2025-10-10T18:45:01Z",430102,6322,40388,master
|
132750724,build-your-own-x,codecrafters-io/build-your-own-x,Master programming by recreating your favorite technologies from scratch.,"2018-05-09T12:03:18Z","2025-10-28T12:37:11Z","2025-10-10T18:45:01Z",430877,6332,40453,master
|
||||||
21737465,awesome,sindresorhus/awesome,😎 Awesome lists about all kinds of interesting topics,"2014-07-11T13:42:37Z","2025-10-27T07:44:27Z","2025-10-23T17:26:53Z",409760,8016,32015,main
|
21737465,awesome,sindresorhus/awesome,😎 Awesome lists about all kinds of interesting topics,"2014-07-11T13:42:37Z","2025-10-28T12:40:21Z","2025-10-27T17:57:31Z",410052,8017,32029,main
|
||||||
```
|
```
|
||||||
|
|
||||||
---
|
---
|
||||||
@@ -208,36 +208,36 @@ metrics[5]{date,views,clicks,conversions,revenue,bounceRate}:
|
|||||||
> [!NOTE]
|
> [!NOTE]
|
||||||
> Measured with [`gpt-tokenizer`](https://github.com/niieani/gpt-tokenizer) using `o200k_base` encoding (used by GPT-5 and other modern models). Savings will vary across models and tokenizers.
|
> Measured with [`gpt-tokenizer`](https://github.com/niieani/gpt-tokenizer) using `o200k_base` encoding (used by GPT-5 and other modern models). Savings will vary across models and tokenizers.
|
||||||
|
|
||||||
<!-- automd:file src="./benchmarks/results/accuracy/report.md" -->
|
<!-- automd:file src="./benchmarks/results/retrieval-accuracy.md" -->
|
||||||
|
|
||||||
### Retrieval Accuracy
|
### Retrieval Accuracy
|
||||||
|
|
||||||
Accuracy across **3 LLMs** on **159 data retrieval questions**:
|
Accuracy across **3 LLMs** on **154 data retrieval questions**:
|
||||||
|
|
||||||
```
|
```
|
||||||
gpt-5-nano
|
|
||||||
toon ████████████████████ 99.4% (158/159)
|
|
||||||
yaml ███████████████████░ 95.0% (151/159)
|
|
||||||
csv ██████████████████░░ 92.5% (147/159)
|
|
||||||
json ██████████████████░░ 92.5% (147/159)
|
|
||||||
xml ██████████████████░░ 91.2% (145/159)
|
|
||||||
|
|
||||||
claude-haiku-4-5
|
|
||||||
toon ███████████████░░░░░ 75.5% (120/159)
|
|
||||||
xml ███████████████░░░░░ 75.5% (120/159)
|
|
||||||
csv ███████████████░░░░░ 75.5% (120/159)
|
|
||||||
json ███████████████░░░░░ 75.5% (120/159)
|
|
||||||
yaml ███████████████░░░░░ 74.2% (118/159)
|
|
||||||
|
|
||||||
gemini-2.5-flash
|
gemini-2.5-flash
|
||||||
xml ██████████████████░░ 91.8% (146/159)
|
xml ██████████████████░░ 90.3% (139/154)
|
||||||
csv █████████████████░░░ 86.2% (137/159)
|
csv ██████████████████░░ 89.0% (137/154)
|
||||||
toon █████████████████░░░ 84.9% (135/159)
|
toon █████████████████░░░ 87.0% (134/154)
|
||||||
json ████████████████░░░░ 81.8% (130/159)
|
json ████████████████░░░░ 79.2% (122/154)
|
||||||
yaml ████████████████░░░░ 78.6% (125/159)
|
yaml ███████████████░░░░░ 76.0% (117/154)
|
||||||
|
|
||||||
|
gpt-5-nano
|
||||||
|
toon ███████████████████░ 96.1% (148/154)
|
||||||
|
csv ██████████████████░░ 90.3% (139/154)
|
||||||
|
yaml ██████████████████░░ 89.0% (137/154)
|
||||||
|
json ██████████████████░░ 87.7% (135/154)
|
||||||
|
xml █████████████████░░░ 83.8% (129/154)
|
||||||
|
|
||||||
|
claude-haiku-4-5-20251001
|
||||||
|
json ██████████░░░░░░░░░░ 48.7% (75/154)
|
||||||
|
toon ██████████░░░░░░░░░░ 48.1% (74/154)
|
||||||
|
xml █████████░░░░░░░░░░░ 47.4% (73/154)
|
||||||
|
yaml █████████░░░░░░░░░░░ 47.4% (73/154)
|
||||||
|
csv █████████░░░░░░░░░░░ 45.5% (70/154)
|
||||||
```
|
```
|
||||||
|
|
||||||
**Advantage:** TOON achieves **86.6% accuracy** (vs JSON's 83.2%) while using **46.3% fewer tokens**.
|
**Advantage:** TOON achieves **77.1% accuracy** (vs JSON's 71.9%) while using **46.3% fewer tokens**.
|
||||||
|
|
||||||
<details>
|
<details>
|
||||||
<summary><strong>Performance by dataset and model</strong></summary>
|
<summary><strong>Performance by dataset and model</strong></summary>
|
||||||
@@ -248,73 +248,73 @@ gemini-2.5-flash
|
|||||||
|
|
||||||
| Format | Accuracy | Tokens | Correct/Total |
|
| Format | Accuracy | Tokens | Correct/Total |
|
||||||
| ------ | -------- | ------ | ------------- |
|
| ------ | -------- | ------ | ------------- |
|
||||||
| `toon` | 87.4% | 2.483 | 152/174 |
|
| `csv` | 74.7% | 2,337 | 112/150 |
|
||||||
| `csv` | 82.8% | 2.337 | 144/174 |
|
| `toon` | 76.7% | 2,483 | 115/150 |
|
||||||
| `yaml` | 83.9% | 4.969 | 146/174 |
|
| `yaml` | 70.7% | 4,969 | 106/150 |
|
||||||
| `json` | 83.9% | 6.347 | 146/174 |
|
| `xml` | 77.3% | 7,314 | 116/150 |
|
||||||
| `xml` | 88.5% | 7.314 | 154/174 |
|
| `json` | 69.3% | 6,347 | 104/150 |
|
||||||
|
|
||||||
##### E-commerce orders with nested structures
|
##### E-commerce orders with nested structures
|
||||||
|
|
||||||
| Format | Accuracy | Tokens | Correct/Total |
|
| Format | Accuracy | Tokens | Correct/Total |
|
||||||
| ------ | -------- | ------ | ------------- |
|
| ------ | -------- | ------ | ------------- |
|
||||||
| `toon` | 90.9% | 5.967 | 120/132 |
|
| `toon` | 80.0% | 5,967 | 96/120 |
|
||||||
| `csv` | 93.9% | 6.735 | 124/132 |
|
| `csv` | 75.8% | 6,735 | 91/120 |
|
||||||
| `yaml` | 87.1% | 7.328 | 115/132 |
|
| `yaml` | 74.2% | 7,328 | 89/120 |
|
||||||
| `json` | 87.9% | 9.694 | 116/132 |
|
| `json` | 79.2% | 9,694 | 95/120 |
|
||||||
| `xml` | 93.2% | 10.992 | 123/132 |
|
| `xml` | 78.3% | 10,992 | 94/120 |
|
||||||
|
|
||||||
##### Time-series analytics data
|
##### Time-series analytics data
|
||||||
|
|
||||||
| Format | Accuracy | Tokens | Correct/Total |
|
| Format | Accuracy | Tokens | Correct/Total |
|
||||||
| ------ | -------- | ------ | ------------- |
|
| ------ | -------- | ------ | ------------- |
|
||||||
| `csv` | 89.7% | 1.393 | 78/87 |
|
| `csv` | 75.5% | 1,393 | 77/102 |
|
||||||
| `toon` | 88.5% | 1.515 | 77/87 |
|
| `toon` | 76.5% | 1,515 | 78/102 |
|
||||||
| `yaml` | 83.9% | 2.938 | 73/87 |
|
| `yaml` | 74.5% | 2,938 | 76/102 |
|
||||||
| `json` | 88.5% | 3.665 | 77/87 |
|
| `json` | 76.5% | 3,665 | 78/102 |
|
||||||
| `xml` | 85.1% | 4.376 | 74/87 |
|
| `xml` | 74.5% | 4,376 | 76/102 |
|
||||||
|
|
||||||
##### Top 100 GitHub repositories
|
##### Top 100 GitHub repositories
|
||||||
|
|
||||||
| Format | Accuracy | Tokens | Correct/Total |
|
| Format | Accuracy | Tokens | Correct/Total |
|
||||||
| ------ | -------- | ------ | ------------- |
|
| ------ | -------- | ------ | ------------- |
|
||||||
| `toon` | 76.2% | 8.745 | 64/84 |
|
| `toon` | 74.4% | 8,745 | 67/90 |
|
||||||
| `csv` | 69.0% | 8.513 | 58/84 |
|
| `csv` | 73.3% | 8,513 | 66/90 |
|
||||||
| `yaml` | 71.4% | 13.129 | 60/84 |
|
| `yaml` | 62.2% | 13,129 | 56/90 |
|
||||||
| `json` | 69.0% | 15.145 | 58/84 |
|
| `json` | 61.1% | 15,145 | 55/90 |
|
||||||
| `xml` | 71.4% | 17.095 | 60/84 |
|
| `xml` | 61.1% | 17,095 | 55/90 |
|
||||||
|
|
||||||
#### Performance by Model
|
#### Performance by Model
|
||||||
|
|
||||||
##### gpt-5-nano
|
|
||||||
|
|
||||||
| Format | Accuracy | Correct/Total |
|
|
||||||
| ------ | -------- | ------------- |
|
|
||||||
| `toon` | 99.4% | 158/159 |
|
|
||||||
| `yaml` | 95.0% | 151/159 |
|
|
||||||
| `csv` | 92.5% | 147/159 |
|
|
||||||
| `json` | 92.5% | 147/159 |
|
|
||||||
| `xml` | 91.2% | 145/159 |
|
|
||||||
|
|
||||||
##### claude-haiku-4-5
|
|
||||||
|
|
||||||
| Format | Accuracy | Correct/Total |
|
|
||||||
| ------ | -------- | ------------- |
|
|
||||||
| `toon` | 75.5% | 120/159 |
|
|
||||||
| `xml` | 75.5% | 120/159 |
|
|
||||||
| `csv` | 75.5% | 120/159 |
|
|
||||||
| `json` | 75.5% | 120/159 |
|
|
||||||
| `yaml` | 74.2% | 118/159 |
|
|
||||||
|
|
||||||
##### gemini-2.5-flash
|
##### gemini-2.5-flash
|
||||||
|
|
||||||
| Format | Accuracy | Correct/Total |
|
| Format | Accuracy | Correct/Total |
|
||||||
| ------ | -------- | ------------- |
|
| ------ | -------- | ------------- |
|
||||||
| `xml` | 91.8% | 146/159 |
|
| `xml` | 90.3% | 139/154 |
|
||||||
| `csv` | 86.2% | 137/159 |
|
| `csv` | 89.0% | 137/154 |
|
||||||
| `toon` | 84.9% | 135/159 |
|
| `toon` | 87.0% | 134/154 |
|
||||||
| `json` | 81.8% | 130/159 |
|
| `json` | 79.2% | 122/154 |
|
||||||
| `yaml` | 78.6% | 125/159 |
|
| `yaml` | 76.0% | 117/154 |
|
||||||
|
|
||||||
|
##### gpt-5-nano
|
||||||
|
|
||||||
|
| Format | Accuracy | Correct/Total |
|
||||||
|
| ------ | -------- | ------------- |
|
||||||
|
| `toon` | 96.1% | 148/154 |
|
||||||
|
| `csv` | 90.3% | 139/154 |
|
||||||
|
| `yaml` | 89.0% | 137/154 |
|
||||||
|
| `json` | 87.7% | 135/154 |
|
||||||
|
| `xml` | 83.8% | 129/154 |
|
||||||
|
|
||||||
|
##### claude-haiku-4-5-20251001
|
||||||
|
|
||||||
|
| Format | Accuracy | Correct/Total |
|
||||||
|
| ------ | -------- | ------------- |
|
||||||
|
| `json` | 48.7% | 75/154 |
|
||||||
|
| `toon` | 48.1% | 74/154 |
|
||||||
|
| `xml` | 47.4% | 73/154 |
|
||||||
|
| `yaml` | 47.4% | 73/154 |
|
||||||
|
| `csv` | 45.5% | 70/154 |
|
||||||
|
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
@@ -336,32 +336,34 @@ Four datasets designed to test different structural patterns:
|
|||||||
|
|
||||||
#### Question Types
|
#### Question Types
|
||||||
|
|
||||||
159 questions are generated dynamically across three categories:
|
154 questions are generated dynamically across three categories:
|
||||||
|
|
||||||
- **Field retrieval (50%)**: Direct value lookups
|
- **Field retrieval (40%)**: Direct value lookups or values that can be read straight off a record (including booleans and simple counts such as array lengths)
|
||||||
- Example: "What is Alice's salary?" → `75000`
|
- Example: "What is Alice's salary?" → `75000`
|
||||||
|
- Example: "How many items are in order ORD-0042?" → `3`
|
||||||
- Example: "What is the customer name for order ORD-0042?" → `John Doe`
|
- Example: "What is the customer name for order ORD-0042?" → `John Doe`
|
||||||
|
|
||||||
- **Aggregation (25%)**: Counting and summation tasks
|
- **Aggregation (32%)**: Dataset-level totals and averages plus single-condition filters (counts, sums, min/max comparisons)
|
||||||
- Example: "How many employees work in Engineering?" → `17`
|
- Example: "How many employees work in Engineering?" → `17`
|
||||||
- Example: "What is the total revenue across all orders?" → `45123.50`
|
- Example: "What is the total revenue across all orders?" → `45123.50`
|
||||||
|
- Example: "How many employees have salary > 80000?" → `23`
|
||||||
|
|
||||||
- **Filtering (25%)**: Conditional queries
|
- **Filtering (28%)**: Multi-condition queries requiring compound logic (AND constraints across fields)
|
||||||
- Example: "How many employees in Sales have salary > 80000?" → `5`
|
- Example: "How many employees in Sales have salary > 80000?" → `5`
|
||||||
- Example: "How many orders have total > 400?" → `12`
|
- Example: "How many active employees have more than 10 years of experience?" → `8`
|
||||||
|
|
||||||
#### Evaluation Process
|
#### Evaluation Process
|
||||||
|
|
||||||
1. **Format conversion:** Each dataset is converted to all 5 formats (TOON, JSON, YAML, CSV, XML).
|
1. **Format conversion:** Each dataset is converted to all 5 formats (TOON, CSV, XML, JSON, YAML).
|
||||||
2. **Query LLM**: Each model receives formatted data + question in a prompt and extracts the answer.
|
2. **Query LLM**: Each model receives formatted data + question in a prompt and extracts the answer.
|
||||||
4. **Validate with LLM-as-judge**: `gpt-5-nano` validates if the answer is semantically correct (e.g., `50000` = `$50,000`, `Engineering` = `engineering`, `2025-01-01` = `January 1, 2025`).
|
3. **Validate with LLM-as-judge**: `gpt-5-nano` validates if the answer is semantically correct (e.g., `50000` = `$50,000`, `Engineering` = `engineering`, `2025-01-01` = `January 1, 2025`).
|
||||||
|
|
||||||
#### Models & Configuration
|
#### Models & Configuration
|
||||||
|
|
||||||
- **Models tested**: `gpt-5-nano`, `claude-haiku-4-5`, `gemini-2.5-flash`
|
- **Models tested**: `gemini-2.5-flash`, `gpt-5-nano`, `claude-haiku-4-5-20251001`
|
||||||
- **Token counting**: Using `gpt-tokenizer` with `o200k_base` encoding (GPT-5 tokenizer)
|
- **Token counting**: Using `gpt-tokenizer` with `o200k_base` encoding (GPT-5 tokenizer)
|
||||||
- **Temperature**: 0 (for non-reasoning models)
|
- **Temperature**: 0 (for non-reasoning models)
|
||||||
- **Total evaluations**: 159 questions × 5 formats × 3 models = 2,385 LLM calls
|
- **Total evaluations**: 154 questions × 5 formats × 3 models = 2,310 LLM calls
|
||||||
|
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -5,7 +5,7 @@
|
|||||||
"scripts": {
|
"scripts": {
|
||||||
"benchmark:token-efficiency": "tsx scripts/token-efficiency-benchmark.ts",
|
"benchmark:token-efficiency": "tsx scripts/token-efficiency-benchmark.ts",
|
||||||
"benchmark:accuracy": "tsx --env-file=.env scripts/accuracy-benchmark.ts",
|
"benchmark:accuracy": "tsx --env-file=.env scripts/accuracy-benchmark.ts",
|
||||||
"fetch-github-data": "tsx scripts/fetch-github-data.ts",
|
"fetch:github-repos": "tsx scripts/fetch-github-repos.ts",
|
||||||
"test": "vitest"
|
"test": "vitest"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
@@ -14,14 +14,16 @@
|
|||||||
"@ai-sdk/openai": "^2.0.53",
|
"@ai-sdk/openai": "^2.0.53",
|
||||||
"@ai-sdk/provider": "^2.0.0",
|
"@ai-sdk/provider": "^2.0.0",
|
||||||
"@antfu/eslint-config": "^6.1.0",
|
"@antfu/eslint-config": "^6.1.0",
|
||||||
|
"@clack/prompts": "^0.11.0",
|
||||||
"@faker-js/faker": "^10.1.0",
|
"@faker-js/faker": "^10.1.0",
|
||||||
"ai": "^5.0.80",
|
"ai": "^5.0.80",
|
||||||
"consola": "^3.4.2",
|
|
||||||
"csv-stringify": "^6.6.0",
|
"csv-stringify": "^6.6.0",
|
||||||
"fast-xml-parser": "^5.3.0",
|
"fast-xml-parser": "^5.3.0",
|
||||||
"gpt-tokenizer": "^3.2.0",
|
"gpt-tokenizer": "^3.2.0",
|
||||||
"ofetch": "^1.4.1",
|
"ofetch": "^1.4.1",
|
||||||
"p-map": "^7.0.3",
|
"p-map": "^7.0.3",
|
||||||
|
"p-queue": "^9.0.0",
|
||||||
|
"unstorage": "^1.17.1",
|
||||||
"yaml": "^2.8.1"
|
"yaml": "^2.8.1"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
1
benchmarks/results/accuracy/models/gemini-2.5-flash
Normal file
1
benchmarks/results/accuracy/models/gemini-2.5-flash
Normal file
File diff suppressed because one or more lines are too long
1
benchmarks/results/accuracy/models/gpt-5-nano
Normal file
1
benchmarks/results/accuracy/models/gpt-5-nano
Normal file
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
@@ -1,91 +0,0 @@
|
|||||||
{
|
|
||||||
"formatResults": [
|
|
||||||
{
|
|
||||||
"format": "toon",
|
|
||||||
"accuracy": 0.8658280922431866,
|
|
||||||
"totalTokens": 4678,
|
|
||||||
"averageLatency": 5321,
|
|
||||||
"correctCount": 413,
|
|
||||||
"totalCount": 477
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"format": "xml",
|
|
||||||
"accuracy": 0.8616352201257862,
|
|
||||||
"totalTokens": 9944,
|
|
||||||
"averageLatency": 6035,
|
|
||||||
"correctCount": 411,
|
|
||||||
"totalCount": 477
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"format": "csv",
|
|
||||||
"accuracy": 0.8469601677148847,
|
|
||||||
"totalTokens": 4745,
|
|
||||||
"averageLatency": 6551,
|
|
||||||
"correctCount": 404,
|
|
||||||
"totalCount": 477
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"format": "json",
|
|
||||||
"accuracy": 0.8322851153039832,
|
|
||||||
"totalTokens": 8713,
|
|
||||||
"averageLatency": 7981,
|
|
||||||
"correctCount": 397,
|
|
||||||
"totalCount": 477
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"format": "yaml",
|
|
||||||
"accuracy": 0.8259958071278826,
|
|
||||||
"totalTokens": 7091,
|
|
||||||
"averageLatency": 5561,
|
|
||||||
"correctCount": 394,
|
|
||||||
"totalCount": 477
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"questions": 159,
|
|
||||||
"models": [
|
|
||||||
"gpt-5-nano",
|
|
||||||
"claude-haiku-4-5",
|
|
||||||
"gemini-2.5-flash"
|
|
||||||
],
|
|
||||||
"datasets": [
|
|
||||||
{
|
|
||||||
"name": "tabular",
|
|
||||||
"description": "Uniform employee records (TOON optimal format)"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "nested",
|
|
||||||
"description": "E-commerce orders with nested structures"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "analytics",
|
|
||||||
"description": "Time-series analytics data"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "github",
|
|
||||||
"description": "Top 100 GitHub repositories"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"tokenCounts": {
|
|
||||||
"json-tabular": 6347,
|
|
||||||
"json-nested": 9694,
|
|
||||||
"json-analytics": 3665,
|
|
||||||
"json-github": 15145,
|
|
||||||
"toon-tabular": 2483,
|
|
||||||
"toon-nested": 5967,
|
|
||||||
"toon-analytics": 1515,
|
|
||||||
"toon-github": 8745,
|
|
||||||
"csv-tabular": 2337,
|
|
||||||
"csv-nested": 6735,
|
|
||||||
"csv-analytics": 1393,
|
|
||||||
"csv-github": 8513,
|
|
||||||
"xml-tabular": 7314,
|
|
||||||
"xml-nested": 10992,
|
|
||||||
"xml-analytics": 4376,
|
|
||||||
"xml-github": 17095,
|
|
||||||
"yaml-tabular": 4969,
|
|
||||||
"yaml-nested": 7328,
|
|
||||||
"yaml-analytics": 2938,
|
|
||||||
"yaml-github": 13129
|
|
||||||
},
|
|
||||||
"timestamp": "2025-10-28T07:39:09.360Z"
|
|
||||||
}
|
|
||||||
@@ -1,31 +1,31 @@
|
|||||||
### Retrieval Accuracy
|
### Retrieval Accuracy
|
||||||
|
|
||||||
Accuracy across **3 LLMs** on **159 data retrieval questions**:
|
Accuracy across **3 LLMs** on **154 data retrieval questions**:
|
||||||
|
|
||||||
```
|
```
|
||||||
gpt-5-nano
|
gpt-5-nano
|
||||||
toon ████████████████████ 99.4% (158/159)
|
toon ███████████████████░ 96.1% (148/154)
|
||||||
yaml ███████████████████░ 95.0% (151/159)
|
csv ██████████████████░░ 90.3% (139/154)
|
||||||
csv ██████████████████░░ 92.5% (147/159)
|
yaml ██████████████████░░ 89.0% (137/154)
|
||||||
json ██████████████████░░ 92.5% (147/159)
|
json ██████████████████░░ 87.7% (135/154)
|
||||||
xml ██████████████████░░ 91.2% (145/159)
|
xml █████████████████░░░ 83.8% (129/154)
|
||||||
|
|
||||||
claude-haiku-4-5
|
|
||||||
toon ███████████████░░░░░ 75.5% (120/159)
|
|
||||||
xml ███████████████░░░░░ 75.5% (120/159)
|
|
||||||
csv ███████████████░░░░░ 75.5% (120/159)
|
|
||||||
json ███████████████░░░░░ 75.5% (120/159)
|
|
||||||
yaml ███████████████░░░░░ 74.2% (118/159)
|
|
||||||
|
|
||||||
gemini-2.5-flash
|
gemini-2.5-flash
|
||||||
xml ██████████████████░░ 91.8% (146/159)
|
xml ██████████████████░░ 90.3% (139/154)
|
||||||
csv █████████████████░░░ 86.2% (137/159)
|
csv ██████████████████░░ 89.0% (137/154)
|
||||||
toon █████████████████░░░ 84.9% (135/159)
|
toon █████████████████░░░ 87.0% (134/154)
|
||||||
json ████████████████░░░░ 81.8% (130/159)
|
json ████████████████░░░░ 79.2% (122/154)
|
||||||
yaml ████████████████░░░░ 78.6% (125/159)
|
yaml ███████████████░░░░░ 76.0% (117/154)
|
||||||
|
|
||||||
|
claude-haiku-4-5-20251001
|
||||||
|
json ██████████░░░░░░░░░░ 48.7% (75/154)
|
||||||
|
toon ██████████░░░░░░░░░░ 48.1% (74/154)
|
||||||
|
xml █████████░░░░░░░░░░░ 47.4% (73/154)
|
||||||
|
yaml █████████░░░░░░░░░░░ 47.4% (73/154)
|
||||||
|
csv █████████░░░░░░░░░░░ 45.5% (70/154)
|
||||||
```
|
```
|
||||||
|
|
||||||
**Advantage:** TOON achieves **86.6% accuracy** (vs JSON's 83.2%) while using **46.3% fewer tokens**.
|
**Advantage:** TOON achieves **77.1% accuracy** (vs JSON's 71.9%) while using **46.3% fewer tokens**.
|
||||||
|
|
||||||
<details>
|
<details>
|
||||||
<summary><strong>Performance by dataset and model</strong></summary>
|
<summary><strong>Performance by dataset and model</strong></summary>
|
||||||
@@ -36,41 +36,41 @@ gemini-2.5-flash
|
|||||||
|
|
||||||
| Format | Accuracy | Tokens | Correct/Total |
|
| Format | Accuracy | Tokens | Correct/Total |
|
||||||
| ------ | -------- | ------ | ------------- |
|
| ------ | -------- | ------ | ------------- |
|
||||||
| `toon` | 87.4% | 2.483 | 152/174 |
|
| `csv` | 74.7% | 2,337 | 112/150 |
|
||||||
| `csv` | 82.8% | 2.337 | 144/174 |
|
| `toon` | 76.7% | 2,483 | 115/150 |
|
||||||
| `yaml` | 83.9% | 4.969 | 146/174 |
|
| `yaml` | 70.7% | 4,969 | 106/150 |
|
||||||
| `json` | 83.9% | 6.347 | 146/174 |
|
| `xml` | 77.3% | 7,314 | 116/150 |
|
||||||
| `xml` | 88.5% | 7.314 | 154/174 |
|
| `json` | 69.3% | 6,347 | 104/150 |
|
||||||
|
|
||||||
##### E-commerce orders with nested structures
|
##### E-commerce orders with nested structures
|
||||||
|
|
||||||
| Format | Accuracy | Tokens | Correct/Total |
|
| Format | Accuracy | Tokens | Correct/Total |
|
||||||
| ------ | -------- | ------ | ------------- |
|
| ------ | -------- | ------ | ------------- |
|
||||||
| `toon` | 90.9% | 5.967 | 120/132 |
|
| `toon` | 80.0% | 5,967 | 96/120 |
|
||||||
| `csv` | 93.9% | 6.735 | 124/132 |
|
| `csv` | 75.8% | 6,735 | 91/120 |
|
||||||
| `yaml` | 87.1% | 7.328 | 115/132 |
|
| `yaml` | 74.2% | 7,328 | 89/120 |
|
||||||
| `json` | 87.9% | 9.694 | 116/132 |
|
| `json` | 79.2% | 9,694 | 95/120 |
|
||||||
| `xml` | 93.2% | 10.992 | 123/132 |
|
| `xml` | 78.3% | 10,992 | 94/120 |
|
||||||
|
|
||||||
##### Time-series analytics data
|
##### Time-series analytics data
|
||||||
|
|
||||||
| Format | Accuracy | Tokens | Correct/Total |
|
| Format | Accuracy | Tokens | Correct/Total |
|
||||||
| ------ | -------- | ------ | ------------- |
|
| ------ | -------- | ------ | ------------- |
|
||||||
| `csv` | 89.7% | 1.393 | 78/87 |
|
| `csv` | 75.5% | 1,393 | 77/102 |
|
||||||
| `toon` | 88.5% | 1.515 | 77/87 |
|
| `toon` | 76.5% | 1,515 | 78/102 |
|
||||||
| `yaml` | 83.9% | 2.938 | 73/87 |
|
| `yaml` | 74.5% | 2,938 | 76/102 |
|
||||||
| `json` | 88.5% | 3.665 | 77/87 |
|
| `json` | 76.5% | 3,665 | 78/102 |
|
||||||
| `xml` | 85.1% | 4.376 | 74/87 |
|
| `xml` | 74.5% | 4,376 | 76/102 |
|
||||||
|
|
||||||
##### Top 100 GitHub repositories
|
##### Top 100 GitHub repositories
|
||||||
|
|
||||||
| Format | Accuracy | Tokens | Correct/Total |
|
| Format | Accuracy | Tokens | Correct/Total |
|
||||||
| ------ | -------- | ------ | ------------- |
|
| ------ | -------- | ------ | ------------- |
|
||||||
| `toon` | 76.2% | 8.745 | 64/84 |
|
| `toon` | 74.4% | 8,745 | 67/90 |
|
||||||
| `csv` | 69.0% | 8.513 | 58/84 |
|
| `csv` | 73.3% | 8,513 | 66/90 |
|
||||||
| `yaml` | 71.4% | 13.129 | 60/84 |
|
| `yaml` | 62.2% | 13,129 | 56/90 |
|
||||||
| `json` | 69.0% | 15.145 | 58/84 |
|
| `json` | 61.1% | 15,145 | 55/90 |
|
||||||
| `xml` | 71.4% | 17.095 | 60/84 |
|
| `xml` | 61.1% | 17,095 | 55/90 |
|
||||||
|
|
||||||
#### Performance by Model
|
#### Performance by Model
|
||||||
|
|
||||||
@@ -78,31 +78,31 @@ gemini-2.5-flash
|
|||||||
|
|
||||||
| Format | Accuracy | Correct/Total |
|
| Format | Accuracy | Correct/Total |
|
||||||
| ------ | -------- | ------------- |
|
| ------ | -------- | ------------- |
|
||||||
| `toon` | 99.4% | 158/159 |
|
| `toon` | 96.1% | 148/154 |
|
||||||
| `yaml` | 95.0% | 151/159 |
|
| `csv` | 90.3% | 139/154 |
|
||||||
| `csv` | 92.5% | 147/159 |
|
| `yaml` | 89.0% | 137/154 |
|
||||||
| `json` | 92.5% | 147/159 |
|
| `json` | 87.7% | 135/154 |
|
||||||
| `xml` | 91.2% | 145/159 |
|
| `xml` | 83.8% | 129/154 |
|
||||||
|
|
||||||
##### claude-haiku-4-5
|
|
||||||
|
|
||||||
| Format | Accuracy | Correct/Total |
|
|
||||||
| ------ | -------- | ------------- |
|
|
||||||
| `toon` | 75.5% | 120/159 |
|
|
||||||
| `xml` | 75.5% | 120/159 |
|
|
||||||
| `csv` | 75.5% | 120/159 |
|
|
||||||
| `json` | 75.5% | 120/159 |
|
|
||||||
| `yaml` | 74.2% | 118/159 |
|
|
||||||
|
|
||||||
##### gemini-2.5-flash
|
##### gemini-2.5-flash
|
||||||
|
|
||||||
| Format | Accuracy | Correct/Total |
|
| Format | Accuracy | Correct/Total |
|
||||||
| ------ | -------- | ------------- |
|
| ------ | -------- | ------------- |
|
||||||
| `xml` | 91.8% | 146/159 |
|
| `xml` | 90.3% | 139/154 |
|
||||||
| `csv` | 86.2% | 137/159 |
|
| `csv` | 89.0% | 137/154 |
|
||||||
| `toon` | 84.9% | 135/159 |
|
| `toon` | 87.0% | 134/154 |
|
||||||
| `json` | 81.8% | 130/159 |
|
| `json` | 79.2% | 122/154 |
|
||||||
| `yaml` | 78.6% | 125/159 |
|
| `yaml` | 76.0% | 117/154 |
|
||||||
|
|
||||||
|
##### claude-haiku-4-5-20251001
|
||||||
|
|
||||||
|
| Format | Accuracy | Correct/Total |
|
||||||
|
| ------ | -------- | ------------- |
|
||||||
|
| `json` | 48.7% | 75/154 |
|
||||||
|
| `toon` | 48.1% | 74/154 |
|
||||||
|
| `xml` | 47.4% | 73/154 |
|
||||||
|
| `yaml` | 47.4% | 73/154 |
|
||||||
|
| `csv` | 45.5% | 70/154 |
|
||||||
|
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
@@ -124,31 +124,33 @@ Four datasets designed to test different structural patterns:
|
|||||||
|
|
||||||
#### Question Types
|
#### Question Types
|
||||||
|
|
||||||
159 questions are generated dynamically across three categories:
|
154 questions are generated dynamically across three categories:
|
||||||
|
|
||||||
- **Field retrieval (50%)**: Direct value lookups
|
- **Field retrieval (40%)**: Direct value lookups or values that can be read straight off a record (including booleans and simple counts such as array lengths)
|
||||||
- Example: "What is Alice's salary?" → `75000`
|
- Example: "What is Alice's salary?" → `75000`
|
||||||
|
- Example: "How many items are in order ORD-0042?" → `3`
|
||||||
- Example: "What is the customer name for order ORD-0042?" → `John Doe`
|
- Example: "What is the customer name for order ORD-0042?" → `John Doe`
|
||||||
|
|
||||||
- **Aggregation (25%)**: Counting and summation tasks
|
- **Aggregation (32%)**: Dataset-level totals and averages plus single-condition filters (counts, sums, min/max comparisons)
|
||||||
- Example: "How many employees work in Engineering?" → `17`
|
- Example: "How many employees work in Engineering?" → `17`
|
||||||
- Example: "What is the total revenue across all orders?" → `45123.50`
|
- Example: "What is the total revenue across all orders?" → `45123.50`
|
||||||
|
- Example: "How many employees have salary > 80000?" → `23`
|
||||||
|
|
||||||
- **Filtering (25%)**: Conditional queries
|
- **Filtering (28%)**: Multi-condition queries requiring compound logic (AND constraints across fields)
|
||||||
- Example: "How many employees in Sales have salary > 80000?" → `5`
|
- Example: "How many employees in Sales have salary > 80000?" → `5`
|
||||||
- Example: "How many orders have total > 400?" → `12`
|
- Example: "How many active employees have more than 10 years of experience?" → `8`
|
||||||
|
|
||||||
#### Evaluation Process
|
#### Evaluation Process
|
||||||
|
|
||||||
1. **Format conversion:** Each dataset is converted to all 5 formats (TOON, JSON, YAML, CSV, XML).
|
1. **Format conversion:** Each dataset is converted to all 5 formats (TOON, CSV, XML, JSON, YAML).
|
||||||
2. **Query LLM**: Each model receives formatted data + question in a prompt and extracts the answer.
|
2. **Query LLM**: Each model receives formatted data + question in a prompt and extracts the answer.
|
||||||
4. **Validate with LLM-as-judge**: `gpt-5-nano` validates if the answer is semantically correct (e.g., `50000` = `$50,000`, `Engineering` = `engineering`, `2025-01-01` = `January 1, 2025`).
|
3. **Validate with LLM-as-judge**: `gpt-5-nano` validates if the answer is semantically correct (e.g., `50000` = `$50,000`, `Engineering` = `engineering`, `2025-01-01` = `January 1, 2025`).
|
||||||
|
|
||||||
#### Models & Configuration
|
#### Models & Configuration
|
||||||
|
|
||||||
- **Models tested**: `gpt-5-nano`, `claude-haiku-4-5`, `gemini-2.5-flash`
|
- **Models tested**: `claude-haiku-4-5-20251001`, `gemini-2.5-flash`, `gpt-5-nano`
|
||||||
- **Token counting**: Using `gpt-tokenizer` with `o200k_base` encoding (GPT-5 tokenizer)
|
- **Token counting**: Using `gpt-tokenizer` with `o200k_base` encoding (GPT-5 tokenizer)
|
||||||
- **Temperature**: 0 (for non-reasoning models)
|
- **Temperature**: 0 (for non-reasoning models)
|
||||||
- **Total evaluations**: 159 questions × 5 formats × 3 models = 2,385 LLM calls
|
- **Total evaluations**: 154 questions × 5 formats × 3 models = 2,310 LLM calls
|
||||||
|
|
||||||
</details>
|
</details>
|
||||||
@@ -39,11 +39,11 @@ Total ████████████░░░░░
|
|||||||
"repo": "freeCodeCamp/freeCodeCamp",
|
"repo": "freeCodeCamp/freeCodeCamp",
|
||||||
"description": "freeCodeCamp.org's open-source codebase and curriculum. Learn math, programming,…",
|
"description": "freeCodeCamp.org's open-source codebase and curriculum. Learn math, programming,…",
|
||||||
"createdAt": "2014-12-24T17:49:19Z",
|
"createdAt": "2014-12-24T17:49:19Z",
|
||||||
"updatedAt": "2025-10-27T07:40:58Z",
|
"updatedAt": "2025-10-28T11:58:08Z",
|
||||||
"pushedAt": "2025-10-26T11:31:08Z",
|
"pushedAt": "2025-10-28T10:17:16Z",
|
||||||
"stars": 430828,
|
"stars": 430886,
|
||||||
"watchers": 8582,
|
"watchers": 8583,
|
||||||
"forks": 42136,
|
"forks": 42146,
|
||||||
"defaultBranch": "main"
|
"defaultBranch": "main"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -52,11 +52,11 @@ Total ████████████░░░░░
|
|||||||
"repo": "codecrafters-io/build-your-own-x",
|
"repo": "codecrafters-io/build-your-own-x",
|
||||||
"description": "Master programming by recreating your favorite technologies from scratch.",
|
"description": "Master programming by recreating your favorite technologies from scratch.",
|
||||||
"createdAt": "2018-05-09T12:03:18Z",
|
"createdAt": "2018-05-09T12:03:18Z",
|
||||||
"updatedAt": "2025-10-27T07:43:25Z",
|
"updatedAt": "2025-10-28T12:37:11Z",
|
||||||
"pushedAt": "2025-10-10T18:45:01Z",
|
"pushedAt": "2025-10-10T18:45:01Z",
|
||||||
"stars": 430102,
|
"stars": 430877,
|
||||||
"watchers": 6322,
|
"watchers": 6332,
|
||||||
"forks": 40388,
|
"forks": 40453,
|
||||||
"defaultBranch": "master"
|
"defaultBranch": "master"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -65,11 +65,11 @@ Total ████████████░░░░░
|
|||||||
"repo": "sindresorhus/awesome",
|
"repo": "sindresorhus/awesome",
|
||||||
"description": "😎 Awesome lists about all kinds of interesting topics",
|
"description": "😎 Awesome lists about all kinds of interesting topics",
|
||||||
"createdAt": "2014-07-11T13:42:37Z",
|
"createdAt": "2014-07-11T13:42:37Z",
|
||||||
"updatedAt": "2025-10-27T07:44:27Z",
|
"updatedAt": "2025-10-28T12:40:21Z",
|
||||||
"pushedAt": "2025-10-23T17:26:53Z",
|
"pushedAt": "2025-10-27T17:57:31Z",
|
||||||
"stars": 409760,
|
"stars": 410052,
|
||||||
"watchers": 8016,
|
"watchers": 8017,
|
||||||
"forks": 32015,
|
"forks": 32029,
|
||||||
"defaultBranch": "main"
|
"defaultBranch": "main"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
@@ -80,9 +80,9 @@ Total ████████████░░░░░
|
|||||||
|
|
||||||
```
|
```
|
||||||
repositories[3]{id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch}:
|
repositories[3]{id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch}:
|
||||||
28457823,freeCodeCamp,freeCodeCamp/freeCodeCamp,"freeCodeCamp.org's open-source codebase and curriculum. Learn math, programming,…","2014-12-24T17:49:19Z","2025-10-27T07:40:58Z","2025-10-26T11:31:08Z",430828,8582,42136,main
|
28457823,freeCodeCamp,freeCodeCamp/freeCodeCamp,"freeCodeCamp.org's open-source codebase and curriculum. Learn math, programming,…","2014-12-24T17:49:19Z","2025-10-28T11:58:08Z","2025-10-28T10:17:16Z",430886,8583,42146,main
|
||||||
132750724,build-your-own-x,codecrafters-io/build-your-own-x,Master programming by recreating your favorite technologies from scratch.,"2018-05-09T12:03:18Z","2025-10-27T07:43:25Z","2025-10-10T18:45:01Z",430102,6322,40388,master
|
132750724,build-your-own-x,codecrafters-io/build-your-own-x,Master programming by recreating your favorite technologies from scratch.,"2018-05-09T12:03:18Z","2025-10-28T12:37:11Z","2025-10-10T18:45:01Z",430877,6332,40453,master
|
||||||
21737465,awesome,sindresorhus/awesome,😎 Awesome lists about all kinds of interesting topics,"2014-07-11T13:42:37Z","2025-10-27T07:44:27Z","2025-10-23T17:26:53Z",409760,8016,32015,main
|
21737465,awesome,sindresorhus/awesome,😎 Awesome lists about all kinds of interesting topics,"2014-07-11T13:42:37Z","2025-10-28T12:40:21Z","2025-10-27T17:57:31Z",410052,8017,32029,main
|
||||||
```
|
```
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|||||||
@@ -1,51 +1,53 @@
|
|||||||
/**
|
import type { Question } from '../src/types'
|
||||||
* LLM Retrieval Accuracy Benchmark
|
|
||||||
*
|
|
||||||
* Main entry point that orchestrates the full benchmark:
|
|
||||||
* 1. Generate questions from datasets
|
|
||||||
* 2. Format data in all formats (JSON, TOON, YAML, Markdown-kv)
|
|
||||||
* 3. Evaluate each question with each format using LLMs
|
|
||||||
* 4. Generate reports
|
|
||||||
*/
|
|
||||||
|
|
||||||
import type { EvaluationResult, Question } from '../src/types'
|
|
||||||
import * as fsp from 'node:fs/promises'
|
|
||||||
import * as path from 'node:path'
|
import * as path from 'node:path'
|
||||||
import { consola } from 'consola'
|
import process from 'node:process'
|
||||||
import pMap from 'p-map'
|
import * as prompts from '@clack/prompts'
|
||||||
import { BENCHMARKS_DIR, DEFAULT_CONCURRENCY, DRY_RUN, DRY_RUN_LIMITS, ROOT_DIR } from '../src/constants'
|
import PQueue from 'p-queue'
|
||||||
|
import { DEFAULT_CONCURRENCY, DRY_RUN, DRY_RUN_LIMITS, MODEL_RPM_LIMITS, ROOT_DIR } from '../src/constants'
|
||||||
import { datasets } from '../src/datasets'
|
import { datasets } from '../src/datasets'
|
||||||
import { evaluateQuestion, models } from '../src/evaluate'
|
import { evaluateQuestion, models } from '../src/evaluate'
|
||||||
import { formatters } from '../src/formatters'
|
import { formatters } from '../src/formatters'
|
||||||
import { generateQuestions } from '../src/questions'
|
import { generateQuestions } from '../src/questions'
|
||||||
import { calculateFormatResults, calculateTokenCounts, saveResults } from '../src/report'
|
import { calculateFormatResults, calculateTokenCounts, saveResults } from '../src/report'
|
||||||
|
import { getAllModelResults, hasModelResults, saveModelResults } from '../src/storage'
|
||||||
|
|
||||||
consola.start('Retrieval Accuracy Benchmark for TOON')
|
prompts.intro('Retrieval Accuracy Benchmark')
|
||||||
|
|
||||||
// Check if results already exist
|
// Prompt user to select which models to benchmark
|
||||||
const resultsDir = path.join(BENCHMARKS_DIR, 'results', 'accuracy')
|
const modelChoices = models.map(({ modelId }) => ({
|
||||||
const rawResultsPath = path.join(resultsDir, 'raw-results.json')
|
value: modelId,
|
||||||
const summaryPath = path.join(resultsDir, 'summary.json')
|
label: modelId,
|
||||||
|
}))
|
||||||
|
|
||||||
let existingResults: EvaluationResult[] | undefined
|
const selectedModels = await prompts.multiselect({
|
||||||
let existingTokenCounts: Record<string, number> | undefined
|
message: 'Select models to benchmark (Space to select, Enter to confirm)',
|
||||||
|
options: modelChoices,
|
||||||
|
required: true,
|
||||||
|
})
|
||||||
|
|
||||||
try {
|
if (prompts.isCancel(selectedModels)) {
|
||||||
const [rawData, summaryData] = await Promise.all([
|
prompts.cancel('Benchmark cancelled')
|
||||||
fsp.readFile(rawResultsPath, 'utf-8'),
|
process.exit(0)
|
||||||
fsp.readFile(summaryPath, 'utf-8'),
|
|
||||||
])
|
|
||||||
existingResults = JSON.parse(rawData)
|
|
||||||
const summary = JSON.parse(summaryData)
|
|
||||||
existingTokenCounts = summary.tokenCounts
|
|
||||||
consola.info('Found existing results – regenerating report only')
|
|
||||||
}
|
}
|
||||||
catch {
|
|
||||||
// Results don't exist, will run full evaluation
|
const activeModels = models.filter(m => selectedModels.includes(m.modelId))
|
||||||
|
|
||||||
|
prompts.log.info(`Selected ${activeModels.length} model(s): ${activeModels.map(m => m.modelId).join(', ')}`)
|
||||||
|
|
||||||
|
// Check which models already have results
|
||||||
|
const existingModelResults: Record<string, boolean> = {}
|
||||||
|
for (const model of activeModels) {
|
||||||
|
const existingResult = await hasModelResults(model.modelId)
|
||||||
|
if (existingResult)
|
||||||
|
existingModelResults[model.modelId] = existingResult
|
||||||
|
}
|
||||||
|
|
||||||
|
if (Object.keys(existingModelResults).length > 0) {
|
||||||
|
prompts.log.info(`Found existing results for ${Object.values(existingModelResults).length} model(s)`)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (DRY_RUN) {
|
if (DRY_RUN) {
|
||||||
consola.info('Limiting questions and models for dry run')
|
prompts.log.info('Limiting questions and models for dry run')
|
||||||
}
|
}
|
||||||
|
|
||||||
let questions = generateQuestions()
|
let questions = generateQuestions()
|
||||||
@@ -55,79 +57,98 @@ if (DRY_RUN && DRY_RUN_LIMITS.maxQuestions) {
|
|||||||
questions = questions.slice(0, DRY_RUN_LIMITS.maxQuestions)
|
questions = questions.slice(0, DRY_RUN_LIMITS.maxQuestions)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Filter models for dry run
|
prompts.log.info(`Evaluating ${questions.length} questions`)
|
||||||
const activeModels = DRY_RUN && DRY_RUN_LIMITS.allowedModels.length > 0
|
prompts.log.info(`Testing ${Object.keys(formatters).length} formats`)
|
||||||
? Object.fromEntries(
|
|
||||||
Object.entries(models).filter(([name]) => DRY_RUN_LIMITS.allowedModels.includes(name)),
|
|
||||||
)
|
|
||||||
: models
|
|
||||||
|
|
||||||
let results: EvaluationResult[]
|
// Evaluate each model separately and save results incrementally
|
||||||
let tokenCounts: Record<string, number>
|
for (const model of activeModels) {
|
||||||
|
const modelId = model.modelId
|
||||||
|
|
||||||
if (existingResults && existingTokenCounts) {
|
// Skip if results already exist
|
||||||
// Reuse existing results
|
if (existingModelResults[modelId]) {
|
||||||
results = existingResults
|
prompts.log.info(`Skipping ${modelId} (results already exist)`)
|
||||||
tokenCounts = existingTokenCounts
|
continue
|
||||||
}
|
}
|
||||||
else {
|
|
||||||
// Run full evaluation
|
|
||||||
consola.info(`Evaluating ${questions.length} questions`)
|
|
||||||
consola.info(`Testing ${Object.keys(formatters).length} formats`)
|
|
||||||
consola.info(`Using ${Object.keys(activeModels).length} models: ${Object.keys(activeModels).join(', ')}`)
|
|
||||||
|
|
||||||
// Calculate token counts for all format+dataset combinations
|
prompts.log.step(`Running benchmark for ${modelId}`)
|
||||||
tokenCounts = calculateTokenCounts(formatters)
|
|
||||||
|
|
||||||
// Generate evaluation tasks
|
|
||||||
const tasks: { question: Question, formatName: string, modelName: string }[] = []
|
|
||||||
|
|
||||||
|
// Generate evaluation tasks for this model
|
||||||
|
const tasks: { question: Question, formatName: string }[] = []
|
||||||
for (const question of questions) {
|
for (const question of questions) {
|
||||||
for (const [formatName] of Object.entries(formatters)) {
|
for (const [formatName] of Object.entries(formatters)) {
|
||||||
for (const [modelName] of Object.entries(activeModels)) {
|
tasks.push({ question, formatName })
|
||||||
tasks.push({ question, formatName, modelName })
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const total = tasks.length
|
const total = tasks.length
|
||||||
consola.start(`Running ${total} evaluations with concurrency: ${DEFAULT_CONCURRENCY}`)
|
const rpmLimit = MODEL_RPM_LIMITS[modelId]
|
||||||
|
const queue = new PQueue({
|
||||||
|
concurrency: DEFAULT_CONCURRENCY,
|
||||||
|
intervalCap: rpmLimit,
|
||||||
|
interval: rpmLimit ? 60_000 : undefined,
|
||||||
|
})
|
||||||
|
|
||||||
results = await pMap(
|
const evalSpinner = prompts.spinner()
|
||||||
tasks,
|
evalSpinner.start(`Running ${total} evaluations (concurrency: ${DEFAULT_CONCURRENCY}, RPM limit: ${rpmLimit ?? 'unlimited'})`)
|
||||||
async (task, index) => {
|
|
||||||
|
let completed = 0
|
||||||
|
|
||||||
|
// Queue all tasks
|
||||||
|
const modelResultPromises = tasks.map(task =>
|
||||||
|
queue.add(async () => {
|
||||||
// Format data on-demand
|
// Format data on-demand
|
||||||
const dataset = datasets.find(d => d.name === task.question.dataset)!
|
const dataset = datasets.find(d => d.name === task.question.dataset)!
|
||||||
const formatter = formatters[task.formatName]!
|
const formatter = formatters[task.formatName]!
|
||||||
const formattedData = formatter(dataset.data)
|
const formattedData = formatter(dataset.data)
|
||||||
const model = activeModels[task.modelName as keyof typeof activeModels]!
|
|
||||||
|
|
||||||
const result = await evaluateQuestion({
|
const result = await evaluateQuestion({
|
||||||
question: task.question,
|
question: task.question,
|
||||||
formatName: task.formatName,
|
formatName: task.formatName,
|
||||||
formattedData,
|
formattedData,
|
||||||
model,
|
model,
|
||||||
modelName: task.modelName,
|
|
||||||
})
|
})
|
||||||
|
|
||||||
// Progress update after task completes
|
// Progress update after task completes
|
||||||
if ((index + 1) % 10 === 0 || (index + 1) === total) {
|
completed++
|
||||||
const percent = (((index + 1) / total) * 100).toFixed(1)
|
if (completed % 10 === 0 || completed === total) {
|
||||||
consola.start(`Progress: ${index + 1}/${total} (${percent}%)`)
|
const percent = ((completed / total) * 100).toFixed(1)
|
||||||
|
evalSpinner.message(`Progress: ${completed}/${total} (${percent}%)`)
|
||||||
}
|
}
|
||||||
|
|
||||||
return result
|
return result
|
||||||
},
|
}),
|
||||||
{ concurrency: DEFAULT_CONCURRENCY },
|
|
||||||
)
|
)
|
||||||
|
|
||||||
consola.success('Evaluation complete!')
|
// Wait for all tasks to complete
|
||||||
|
const modelResults = await Promise.all(modelResultPromises)
|
||||||
|
|
||||||
|
evalSpinner.stop(`Evaluation complete for ${modelId}`)
|
||||||
|
|
||||||
|
// Save results immediately for this model
|
||||||
|
await saveModelResults(modelId, modelResults)
|
||||||
|
prompts.log.success(`Saved results for ${modelId}`)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Generate/regenerate markdown report
|
// Generate/regenerate markdown report from all available model results
|
||||||
consola.start('Generating report and saving results…')
|
const reportSpinner = prompts.spinner()
|
||||||
const formatResults = calculateFormatResults(results, tokenCounts)
|
reportSpinner.start('Generating report from all model results')
|
||||||
await saveResults(results, formatResults, questions, tokenCounts)
|
|
||||||
|
|
||||||
consola.info(`Results saved to: \`${path.relative(ROOT_DIR, resultsDir)}\``)
|
// Load all available model results (including any that were skipped)
|
||||||
consola.success(existingResults ? 'Markdown report regenerated!' : 'Evaluation complete!')
|
const allModelResults = await getAllModelResults()
|
||||||
|
const allResults = Object.values(allModelResults).flat()
|
||||||
|
|
||||||
|
if (allResults.length === 0) {
|
||||||
|
prompts.log.warn('No results available to generate report')
|
||||||
|
process.exit(0)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate token counts freshly (deterministic, no need to persist)
|
||||||
|
const tokenCounts = calculateTokenCounts(formatters)
|
||||||
|
|
||||||
|
// Calculate format statistics and save report
|
||||||
|
const formatResults = calculateFormatResults(allResults, tokenCounts)
|
||||||
|
const resultsDir = await saveResults(allResults, formatResults, questions, tokenCounts)
|
||||||
|
|
||||||
|
const reportPath = path.join(resultsDir, 'retrieval-accuracy.md')
|
||||||
|
prompts.log.info(`Report saved to: \`${path.relative(ROOT_DIR, reportPath)}\``)
|
||||||
|
reportSpinner.stop('Report generation complete!')
|
||||||
|
|||||||
@@ -1,18 +1,20 @@
|
|||||||
import * as path from 'node:path'
|
import * as path from 'node:path'
|
||||||
import process from 'node:process'
|
import process from 'node:process'
|
||||||
import { consola } from 'consola'
|
import * as prompts from '@clack/prompts'
|
||||||
import { ofetch } from 'ofetch'
|
import { ofetch } from 'ofetch'
|
||||||
import pMap from 'p-map'
|
import pMap from 'p-map'
|
||||||
import { BENCHMARKS_DIR } from '../src/constants'
|
import { BENCHMARKS_DIR } from '../src/constants'
|
||||||
import { ensureDir, saveJsonFile } from '../src/utils'
|
import { ensureDir, saveJsonFile } from '../src/utils'
|
||||||
|
|
||||||
|
prompts.intro('GitHub Repositories Fetcher')
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// Fetch top 100 repos from GitHub
|
// Fetch top 100 repos from GitHub
|
||||||
const repoList = await searchTop100Repos()
|
const repoList = await searchTop100Repos()
|
||||||
const repos = await fetchRepoDetails(repoList)
|
const repos = await fetchRepoDetails(repoList)
|
||||||
|
|
||||||
if (repos.length === 0) {
|
if (repos.length === 0) {
|
||||||
consola.error('❌ No repositories fetched. Exiting.')
|
prompts.log.error('No repositories fetched. Exiting.')
|
||||||
process.exit(1)
|
process.exit(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -21,15 +23,16 @@ try {
|
|||||||
|
|
||||||
await saveRepos(repos)
|
await saveRepos(repos)
|
||||||
|
|
||||||
consola.success('Done!')
|
prompts.log.success('Done!')
|
||||||
}
|
}
|
||||||
catch (error) {
|
catch (error) {
|
||||||
consola.error(error)
|
prompts.log.error(String(error))
|
||||||
process.exit(1)
|
process.exit(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
async function searchTop100Repos(): Promise<string[]> {
|
async function searchTop100Repos(): Promise<string[]> {
|
||||||
consola.start('Fetching top 100 starred repositories from GitHub API…')
|
const s = prompts.spinner()
|
||||||
|
s.start('Fetching top 100 starred repositories')
|
||||||
|
|
||||||
const response = await ofetch<{ items: { full_name: string }[] }>(
|
const response = await ofetch<{ items: { full_name: string }[] }>(
|
||||||
'https://api.github.com/search/repositories',
|
'https://api.github.com/search/repositories',
|
||||||
@@ -47,23 +50,26 @@ async function searchTop100Repos(): Promise<string[]> {
|
|||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
s.stop('Fetched top 100 repositories')
|
||||||
|
|
||||||
return response.items.map(item => item.full_name)
|
return response.items.map(item => item.full_name)
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchRepoDetails(repoList: string[]): Promise<Record<string, any>[]> {
|
async function fetchRepoDetails(repoList: string[]): Promise<Record<string, any>[]> {
|
||||||
consola.start(`Fetching ${repoList.length} GitHub repositories…`)
|
const s = prompts.spinner()
|
||||||
|
s.start(`Fetching ${repoList.length} GitHub repositories`)
|
||||||
|
|
||||||
const repos = await pMap(
|
const repos = await pMap(
|
||||||
repoList,
|
repoList,
|
||||||
async (repoPath, index) => {
|
async (repoPath, index) => {
|
||||||
consola.info(`[${index + 1}/${repoList.length}] Fetching ${repoPath}…`)
|
s.message(`[${index + 1}/${repoList.length}] Fetching ${repoPath}`)
|
||||||
const { repo } = await ofetch(`https://ungh.cc/repos/${repoPath}`)
|
const { repo } = await ofetch(`https://ungh.cc/repos/${repoPath}`)
|
||||||
return repo
|
return repo
|
||||||
},
|
},
|
||||||
{ concurrency: 5 },
|
{ concurrency: 5 },
|
||||||
)
|
)
|
||||||
|
|
||||||
consola.success(`Successfully fetched ${repos.length}/${repoList.length} repositories`)
|
s.stop(`Successfully fetched ${repos.length}/${repoList.length} repositories`)
|
||||||
|
|
||||||
return repos
|
return repos
|
||||||
}
|
}
|
||||||
@@ -76,5 +82,5 @@ async function saveRepos(repos: Record<string, any>[]): Promise<void> {
|
|||||||
await saveJsonFile(outputFile, repos)
|
await saveJsonFile(outputFile, repos)
|
||||||
|
|
||||||
const relativePath = path.relative(BENCHMARKS_DIR, outputFile)
|
const relativePath = path.relative(BENCHMARKS_DIR, outputFile)
|
||||||
consola.info(`Saved to \`${relativePath}\``)
|
prompts.log.info(`Result saved to \`${relativePath}\``)
|
||||||
}
|
}
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
import * as fsp from 'node:fs/promises'
|
import * as fsp from 'node:fs/promises'
|
||||||
import * as path from 'node:path'
|
import * as path from 'node:path'
|
||||||
import { consola } from 'consola'
|
import * as prompts from '@clack/prompts'
|
||||||
import { encode } from '../../src/index'
|
import { encode } from '../../src/index'
|
||||||
import githubRepos from '../data/github-repos.json' with { type: 'json' }
|
import githubRepos from '../data/github-repos.json' with { type: 'json' }
|
||||||
import { BENCHMARKS_DIR, ROOT_DIR } from '../src/constants'
|
import { BENCHMARKS_DIR, ROOT_DIR } from '../src/constants'
|
||||||
@@ -24,8 +24,6 @@ interface BenchmarkResult {
|
|||||||
showDetailed: boolean
|
showDetailed: boolean
|
||||||
}
|
}
|
||||||
|
|
||||||
const outputFilePath = path.join(BENCHMARKS_DIR, 'results', 'token-efficiency.md')
|
|
||||||
|
|
||||||
const BENCHMARK_EXAMPLES = [
|
const BENCHMARK_EXAMPLES = [
|
||||||
{
|
{
|
||||||
name: 'GitHub Repositories',
|
name: 'GitHub Repositories',
|
||||||
@@ -50,6 +48,8 @@ const BENCHMARK_EXAMPLES = [
|
|||||||
},
|
},
|
||||||
] as const
|
] as const
|
||||||
|
|
||||||
|
prompts.intro('Token Efficiency Benchmark')
|
||||||
|
|
||||||
// Calculate total savings
|
// Calculate total savings
|
||||||
let totalJsonTokens = 0
|
let totalJsonTokens = 0
|
||||||
let totalToonTokens = 0
|
let totalToonTokens = 0
|
||||||
@@ -204,9 +204,12 @@ ${detailedExamples}
|
|||||||
</details>
|
</details>
|
||||||
`.trimStart()
|
`.trimStart()
|
||||||
|
|
||||||
console.log(`${barChartSection}\n`)
|
prompts.log.message(`${barChartSection}\n`)
|
||||||
|
|
||||||
await ensureDir(path.join(BENCHMARKS_DIR, 'results'))
|
const resultsDir = path.join(BENCHMARKS_DIR, 'results')
|
||||||
|
await ensureDir(resultsDir)
|
||||||
|
|
||||||
|
const outputFilePath = path.join(resultsDir, 'token-efficiency.md')
|
||||||
await fsp.writeFile(outputFilePath, markdown, 'utf-8')
|
await fsp.writeFile(outputFilePath, markdown, 'utf-8')
|
||||||
|
|
||||||
consola.success(`Benchmark written to \`${path.relative(ROOT_DIR, outputFilePath)}\``)
|
prompts.log.success(`Result saved to \`${path.relative(ROOT_DIR, outputFilePath)}\``)
|
||||||
|
|||||||
@@ -5,9 +5,22 @@ export const ROOT_DIR: string = url.fileURLToPath(new URL('../../', import.meta.
|
|||||||
export const BENCHMARKS_DIR: string = url.fileURLToPath(new URL('../', import.meta.url))
|
export const BENCHMARKS_DIR: string = url.fileURLToPath(new URL('../', import.meta.url))
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Default concurrency for parallel evaluations
|
* Model-specific RPM (requests per minute) limits to handle API quotas
|
||||||
|
*
|
||||||
|
* @remarks
|
||||||
|
* Set `undefined` for models without specific limits
|
||||||
*/
|
*/
|
||||||
export const DEFAULT_CONCURRENCY = 20
|
/// keep-sorted
|
||||||
|
export const MODEL_RPM_LIMITS: Record<string, number | undefined> = {
|
||||||
|
'claude-haiku-4-5-20251001': 50,
|
||||||
|
'gemini-2.5-flash': 25,
|
||||||
|
'gpt-5-nano': undefined,
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Default concurrency for parallel evaluations to prevent bursting
|
||||||
|
*/
|
||||||
|
export const DEFAULT_CONCURRENCY = 10
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Progress bar configuration
|
* Progress bar configuration
|
||||||
@@ -28,13 +41,83 @@ export const PROGRESS_BAR = {
|
|||||||
export const DRY_RUN: boolean = process.env.DRY_RUN === 'true'
|
export const DRY_RUN: boolean = process.env.DRY_RUN === 'true'
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Limits applied when DRY_RUN is enabled
|
* Limits applied during dry run mode
|
||||||
*/
|
*/
|
||||||
export const DRY_RUN_LIMITS = {
|
export const DRY_RUN_LIMITS = {
|
||||||
/** Maximum number of questions to evaluate */
|
/** Maximum number of questions to evaluate */
|
||||||
maxQuestions: 10,
|
maxQuestions: 10,
|
||||||
/** Maximum number of formats to test */
|
|
||||||
maxFormats: undefined as number | undefined,
|
|
||||||
/** Models to use in dry run */
|
|
||||||
allowedModels: [] as string[],
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Threshold values for filtering and aggregation questions
|
||||||
|
*/
|
||||||
|
export const QUESTION_THRESHOLDS = {
|
||||||
|
tabular: {
|
||||||
|
salaryRanges: [60000, 80000, 100000, 120000],
|
||||||
|
experienceYears: [5, 10, 15, 20],
|
||||||
|
departmentSalaryThreshold: 80000,
|
||||||
|
departmentExperienceThreshold: 10,
|
||||||
|
},
|
||||||
|
nested: {
|
||||||
|
highValueOrders: [200, 400, 600],
|
||||||
|
statusValueThreshold: 300,
|
||||||
|
itemCountThreshold: 3,
|
||||||
|
totalThresholdsForItems: [300, 500],
|
||||||
|
},
|
||||||
|
analytics: {
|
||||||
|
views: [5000, 7000],
|
||||||
|
conversions: [10, 30],
|
||||||
|
viewsForFiltering: [6000, 7000],
|
||||||
|
conversionsForFiltering: 15,
|
||||||
|
revenueThresholds: [500, 1000, 1500, 2000, 2500],
|
||||||
|
viewsThresholdForRevenue: 6000,
|
||||||
|
clicksForFiltering: [250, 400],
|
||||||
|
conversionsForClickFiltering: 15,
|
||||||
|
revenueForBounceRate: [1000, 1500],
|
||||||
|
bounceRateThreshold: 0.5,
|
||||||
|
},
|
||||||
|
github: {
|
||||||
|
stars: [100000, 150000, 200000],
|
||||||
|
forks: [20000, 35000, 50000],
|
||||||
|
watchers: [5000, 8000],
|
||||||
|
starForkCombinations: [
|
||||||
|
{ stars: 75000, forks: 15000 },
|
||||||
|
{ stars: 100000, forks: 20000 },
|
||||||
|
{ stars: 150000, forks: 30000 },
|
||||||
|
{ stars: 200000, forks: 45000 },
|
||||||
|
],
|
||||||
|
starWatcherCombinations: [
|
||||||
|
{ stars: 100000, watchers: 7000 },
|
||||||
|
{ stars: 150000, watchers: 9000 },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
} as const
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Question generation configuration
|
||||||
|
*/
|
||||||
|
export const QUESTION_LIMITS = {
|
||||||
|
tabular: {
|
||||||
|
fieldRetrieval: 20,
|
||||||
|
aggregationDepartments: 6,
|
||||||
|
filteringMultiConditionDepartments: 6,
|
||||||
|
filteringExperience: 4,
|
||||||
|
filteringDepartmentExp: 3,
|
||||||
|
filteringDepartmentActive: 3,
|
||||||
|
},
|
||||||
|
nested: {
|
||||||
|
fieldRetrievalOrders: 8,
|
||||||
|
fieldRetrievalCustomers: 10,
|
||||||
|
aggregationStatuses: 5,
|
||||||
|
filteringStatusAndValue: 5,
|
||||||
|
filteringStatusAndItems: 3,
|
||||||
|
},
|
||||||
|
analytics: {
|
||||||
|
fieldRetrievalDates: 13,
|
||||||
|
},
|
||||||
|
github: {
|
||||||
|
fieldRetrievalRepos: 11,
|
||||||
|
aggregationBranches: 2,
|
||||||
|
filteringStarsAndForks: 8,
|
||||||
|
},
|
||||||
|
} as const
|
||||||
|
|||||||
@@ -1,12 +1,3 @@
|
|||||||
/**
|
|
||||||
* Datasets for TOON benchmarks
|
|
||||||
*
|
|
||||||
* These datasets are designed to test TOON's strengths and weaknesses:
|
|
||||||
* - Tabular: Uniform records (TOON optimal)
|
|
||||||
* - Nested: Complex structures with nested objects
|
|
||||||
* - Analytics: Time-series data
|
|
||||||
*/
|
|
||||||
|
|
||||||
import type { Dataset } from './types'
|
import type { Dataset } from './types'
|
||||||
import { faker } from '@faker-js/faker'
|
import { faker } from '@faker-js/faker'
|
||||||
import githubRepos from '../data/github-repos.json' with { type: 'json' }
|
import githubRepos from '../data/github-repos.json' with { type: 'json' }
|
||||||
@@ -128,7 +119,7 @@ const tabularDataset: Dataset = {
|
|||||||
description: 'Uniform employee records (TOON optimal format)',
|
description: 'Uniform employee records (TOON optimal format)',
|
||||||
data: {
|
data: {
|
||||||
employees: Array.from({ length: 100 }, (_, i): Employee => {
|
employees: Array.from({ length: 100 }, (_, i): Employee => {
|
||||||
const yearsExp = faker.number.int({ min: 1, max: 20 })
|
const yearsExp = faker.number.int({ min: 1, max: 25 })
|
||||||
return {
|
return {
|
||||||
id: i + 1,
|
id: i + 1,
|
||||||
name: faker.person.fullName(),
|
name: faker.person.fullName(),
|
||||||
|
|||||||
@@ -1,28 +1,19 @@
|
|||||||
/**
|
|
||||||
* LLM evaluation logic for TOON benchmarks
|
|
||||||
*
|
|
||||||
* Handles:
|
|
||||||
* - Model configuration
|
|
||||||
* - Question evaluation with LLMs
|
|
||||||
* - Answer validation using LLM-as-judge
|
|
||||||
*/
|
|
||||||
|
|
||||||
import type { LanguageModelV2 } from '@ai-sdk/provider'
|
import type { LanguageModelV2 } from '@ai-sdk/provider'
|
||||||
import type { EvaluationResult, Question } from './types'
|
import type { EvaluationResult, Question } from './types'
|
||||||
import { anthropic } from '@ai-sdk/anthropic'
|
import { anthropic } from '@ai-sdk/anthropic'
|
||||||
import { google } from '@ai-sdk/google'
|
import { google } from '@ai-sdk/google'
|
||||||
import { openai } from '@ai-sdk/openai'
|
import { openai } from '@ai-sdk/openai'
|
||||||
|
import * as prompts from '@clack/prompts'
|
||||||
import { generateText } from 'ai'
|
import { generateText } from 'ai'
|
||||||
import { consola } from 'consola'
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Models used for evaluation
|
* Models used for evaluation
|
||||||
*/
|
*/
|
||||||
export const models: Record<string, LanguageModelV2> = {
|
export const models: LanguageModelV2[] = [
|
||||||
'gpt-5-nano': openai('gpt-5-nano'),
|
openai('gpt-5-nano'),
|
||||||
'claude-haiku-4-5': anthropic('claude-haiku-4-5-20251001'),
|
google('gemini-2.5-flash'),
|
||||||
'gemini-2.5-flash': google('gemini-2.5-flash'),
|
anthropic('claude-haiku-4-5-20251001'),
|
||||||
}
|
]
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Evaluate a single question with a specific format and model
|
* Evaluate a single question with a specific format and model
|
||||||
@@ -33,14 +24,12 @@ export async function evaluateQuestion(
|
|||||||
formatName,
|
formatName,
|
||||||
formattedData,
|
formattedData,
|
||||||
model,
|
model,
|
||||||
modelName,
|
|
||||||
}:
|
}:
|
||||||
{
|
{
|
||||||
question: Question
|
question: Question
|
||||||
formatName: string
|
formatName: string
|
||||||
formattedData: string
|
formattedData: string
|
||||||
model: LanguageModelV2
|
model: LanguageModelV2
|
||||||
modelName: string
|
|
||||||
},
|
},
|
||||||
): Promise<EvaluationResult> {
|
): Promise<EvaluationResult> {
|
||||||
const prompt = `
|
const prompt = `
|
||||||
@@ -59,10 +48,11 @@ Provide only the direct answer, without any additional explanation or formatting
|
|||||||
const { text, usage } = await generateText({
|
const { text, usage } = await generateText({
|
||||||
model,
|
model,
|
||||||
prompt,
|
prompt,
|
||||||
temperature: !model.modelId.startsWith('gpt-') ? 0 : undefined,
|
temperature: !model.modelId.startsWith('gpt-5') ? 0 : undefined,
|
||||||
})
|
})
|
||||||
|
|
||||||
const latencyMs = performance.now() - startTime
|
const latencyMs = performance.now() - startTime
|
||||||
|
|
||||||
const isCorrect = await validateAnswer({
|
const isCorrect = await validateAnswer({
|
||||||
actual: text.trim(),
|
actual: text.trim(),
|
||||||
expected: question.groundTruth,
|
expected: question.groundTruth,
|
||||||
@@ -72,7 +62,7 @@ Provide only the direct answer, without any additional explanation or formatting
|
|||||||
return {
|
return {
|
||||||
questionId: question.id,
|
questionId: question.id,
|
||||||
format: formatName,
|
format: formatName,
|
||||||
model: modelName,
|
model: model.modelId,
|
||||||
expected: question.groundTruth,
|
expected: question.groundTruth,
|
||||||
actual: text.trim(),
|
actual: text.trim(),
|
||||||
isCorrect,
|
isCorrect,
|
||||||
@@ -115,14 +105,14 @@ Respond with only "YES" or "NO".
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
const { text } = await generateText({
|
const { text } = await generateText({
|
||||||
model: models['gpt-5-nano']!,
|
model: models.find(m => m.modelId === 'gpt-5-nano')!,
|
||||||
prompt,
|
prompt,
|
||||||
})
|
})
|
||||||
|
|
||||||
return text.trim().toUpperCase() === 'YES'
|
return text.trim().toUpperCase() === 'YES'
|
||||||
}
|
}
|
||||||
catch (error) {
|
catch (error) {
|
||||||
consola.error('Validation error:', error)
|
prompts.log.error(`Validation error: ${error}`)
|
||||||
// Fallback to simple string comparison
|
// Fallback to simple string comparison
|
||||||
return actual.toLowerCase().trim() === expected.toLowerCase().trim()
|
return actual.toLowerCase().trim() === expected.toLowerCase().trim()
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,20 +1,3 @@
|
|||||||
/**
|
|
||||||
* Format converters for TOON benchmarks
|
|
||||||
*
|
|
||||||
* Converts data to different formats for comparison:
|
|
||||||
* - JSON
|
|
||||||
* - TOON
|
|
||||||
* - CSV
|
|
||||||
* - XML
|
|
||||||
* - YAML
|
|
||||||
*
|
|
||||||
* ## Semantic Equivalence
|
|
||||||
*
|
|
||||||
* All formatters attempt to preserve semantic equivalence with the source data,
|
|
||||||
* meaning the converted data should represent the same information. However,
|
|
||||||
* CSV has inherent limitations with nested structures (see `toCSV` docs).
|
|
||||||
*/
|
|
||||||
|
|
||||||
import { stringify as stringifyCSV } from 'csv-stringify/sync'
|
import { stringify as stringifyCSV } from 'csv-stringify/sync'
|
||||||
import { XMLBuilder } from 'fast-xml-parser'
|
import { XMLBuilder } from 'fast-xml-parser'
|
||||||
import { stringify as stringifyYAML } from 'yaml'
|
import { stringify as stringifyYAML } from 'yaml'
|
||||||
@@ -23,7 +6,10 @@ import { encode as encodeToon } from '../../src/index'
|
|||||||
/**
|
/**
|
||||||
* Format converters registry
|
* Format converters registry
|
||||||
*
|
*
|
||||||
* Each formatter takes unknown data and returns a string representation
|
* @remarks
|
||||||
|
* All formatters attempt to preserve semantic equivalence with the source data,
|
||||||
|
* meaning the converted data should represent the same information. However,
|
||||||
|
* CSV has inherent limitations with nested structures (see `toCSV` docs).
|
||||||
*/
|
*/
|
||||||
export const formatters: Record<string, (data: unknown) => string> = {
|
export const formatters: Record<string, (data: unknown) => string> = {
|
||||||
json: data => JSON.stringify(data, undefined, 2),
|
json: data => JSON.stringify(data, undefined, 2),
|
||||||
@@ -37,7 +23,9 @@ export const formatters: Record<string, (data: unknown) => string> = {
|
|||||||
* Convert data to CSV format
|
* Convert data to CSV format
|
||||||
*
|
*
|
||||||
* @remarks
|
* @remarks
|
||||||
* **Limitations**: CSV is designed for flat tabular data only. This formatter:
|
* Limitations: CSV is designed for flat tabular data only.
|
||||||
|
*
|
||||||
|
* This formatter:
|
||||||
* - Only handles top-level objects with arrays of flat objects
|
* - Only handles top-level objects with arrays of flat objects
|
||||||
* - Cannot properly represent deeply nested structures (nested arrays/objects within rows)
|
* - Cannot properly represent deeply nested structures (nested arrays/objects within rows)
|
||||||
* - Loses nested structure information during conversion
|
* - Loses nested structure information during conversion
|
||||||
|
|||||||
@@ -1,24 +1,18 @@
|
|||||||
/**
|
/**
|
||||||
* Question generation for TOON benchmarks
|
* Question generation for TOON benchmarks
|
||||||
*
|
*
|
||||||
* Generates ~160 questions across different types:
|
* Generates ~150-160 questions across different question types and datasets:
|
||||||
* - Field retrieval (50%): "What is X's Y?"
|
* - Field Retrieval: Direct field access with no computation
|
||||||
* - Aggregation (25%): "How many X have Y?"
|
* Examples: "What is X's salary?", "What is the status of order Y?"
|
||||||
* - Filtering (25%): "List/count X where Y"
|
* - Aggregation: Counts, sums, averages, min/max operations (including single-condition filters)
|
||||||
*
|
* Examples: "How many X?", "What is the total/average?", "How many X > threshold?"
|
||||||
* Questions are generated dynamically based on actual data values
|
* - Filtering: Multi-condition queries requiring complex logical operations
|
||||||
*
|
* Examples: "How many X WHERE condition1 AND condition2?"
|
||||||
* TODO: Balance question distribution across datasets to ensure fair representation.
|
|
||||||
* Current distribution:
|
|
||||||
* - Tabular: 70 questions (43%)
|
|
||||||
* - Nested: 50 questions (31%)
|
|
||||||
* - Analytics: 40 questions (25%)
|
|
||||||
* - GitHub: 40 questions (25%)
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import type { AnalyticsMetric, Employee, Order, Repository } from './datasets'
|
import type { AnalyticsMetric, Employee, Order, Repository } from './datasets'
|
||||||
import type { Question } from './types'
|
import type { Question } from './types'
|
||||||
import { consola } from 'consola'
|
import { QUESTION_LIMITS, QUESTION_THRESHOLDS } from './constants'
|
||||||
import { datasets } from './datasets'
|
import { datasets } from './datasets'
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -34,19 +28,15 @@ export function generateQuestions(): Question[] {
|
|||||||
const analytics = (datasets.find(d => d.name === 'analytics')?.data.metrics as AnalyticsMetric[]) ?? []
|
const analytics = (datasets.find(d => d.name === 'analytics')?.data.metrics as AnalyticsMetric[]) ?? []
|
||||||
const github = (datasets.find(d => d.name === 'github')?.data.repositories as Repository[]) ?? []
|
const github = (datasets.find(d => d.name === 'github')?.data.repositories as Repository[]) ?? []
|
||||||
|
|
||||||
// ========================================
|
|
||||||
// TABULAR DATASET QUESTIONS (70 questions)
|
|
||||||
// ========================================
|
|
||||||
|
|
||||||
if (tabular.length > 0) {
|
if (tabular.length > 0) {
|
||||||
// Field retrieval: specific employees (40 questions)
|
// Field retrieval: specific employees
|
||||||
for (let i = 0; i < Math.min(40, tabular.length); i++) {
|
for (let i = 0; i < Math.min(QUESTION_LIMITS.tabular.fieldRetrieval, tabular.length); i++) {
|
||||||
const emp = tabular[i * 2] || tabular[i]
|
const emp = tabular[i * 2] || tabular[i]
|
||||||
if (!emp)
|
if (!emp)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
// Alternate between different field types
|
// Rotate through all field types
|
||||||
if (i % 3 === 0) {
|
if (i % 5 === 0) {
|
||||||
questions.push({
|
questions.push({
|
||||||
id: `q${idCounter++}`,
|
id: `q${idCounter++}`,
|
||||||
prompt: `What is the salary of ${emp.name}?`,
|
prompt: `What is the salary of ${emp.name}?`,
|
||||||
@@ -55,7 +45,7 @@ export function generateQuestions(): Question[] {
|
|||||||
dataset: 'tabular',
|
dataset: 'tabular',
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
else if (i % 3 === 1) {
|
else if (i % 5 === 1) {
|
||||||
questions.push({
|
questions.push({
|
||||||
id: `q${idCounter++}`,
|
id: `q${idCounter++}`,
|
||||||
prompt: `What department does ${emp.name} work in?`,
|
prompt: `What department does ${emp.name} work in?`,
|
||||||
@@ -64,7 +54,7 @@ export function generateQuestions(): Question[] {
|
|||||||
dataset: 'tabular',
|
dataset: 'tabular',
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
else {
|
else if (i % 5 === 2) {
|
||||||
questions.push({
|
questions.push({
|
||||||
id: `q${idCounter++}`,
|
id: `q${idCounter++}`,
|
||||||
prompt: `What is the email address of ${emp.name}?`,
|
prompt: `What is the email address of ${emp.name}?`,
|
||||||
@@ -73,11 +63,29 @@ export function generateQuestions(): Question[] {
|
|||||||
dataset: 'tabular',
|
dataset: 'tabular',
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
else if (i % 5 === 3) {
|
||||||
|
questions.push({
|
||||||
|
id: `q${idCounter++}`,
|
||||||
|
prompt: `How many years of experience does ${emp.name} have?`,
|
||||||
|
groundTruth: String(emp.yearsExperience),
|
||||||
|
type: 'field-retrieval',
|
||||||
|
dataset: 'tabular',
|
||||||
|
})
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
questions.push({
|
||||||
|
id: `q${idCounter++}`,
|
||||||
|
prompt: `Is ${emp.name} an active employee?`,
|
||||||
|
groundTruth: emp.active ? 'yes' : 'no',
|
||||||
|
type: 'field-retrieval',
|
||||||
|
dataset: 'tabular',
|
||||||
|
})
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Aggregation: count by department
|
// Aggregation: count by department
|
||||||
const departments = [...new Set(tabular.map(e => e.department))]
|
const departments = [...new Set(tabular.map(e => e.department))]
|
||||||
for (const dept of departments.slice(0, 6)) {
|
for (const dept of departments.slice(0, QUESTION_LIMITS.tabular.aggregationDepartments)) {
|
||||||
const count = tabular.filter(e => e.department === dept).length
|
const count = tabular.filter(e => e.department === dept).length
|
||||||
questions.push({
|
questions.push({
|
||||||
id: `q${idCounter++}`,
|
id: `q${idCounter++}`,
|
||||||
@@ -88,9 +96,8 @@ export function generateQuestions(): Question[] {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// Aggregation: salary ranges (4 questions)
|
// Aggregation: salary ranges (single-condition filters)
|
||||||
const salaryThresholds = [60000, 80000, 100000, 120000]
|
for (const threshold of QUESTION_THRESHOLDS.tabular.salaryRanges) {
|
||||||
for (const threshold of salaryThresholds) {
|
|
||||||
const count = tabular.filter(e => e.salary > threshold).length
|
const count = tabular.filter(e => e.salary > threshold).length
|
||||||
questions.push({
|
questions.push({
|
||||||
id: `q${idCounter++}`,
|
id: `q${idCounter++}`,
|
||||||
@@ -101,39 +108,57 @@ export function generateQuestions(): Question[] {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// Filtering: active status
|
// Aggregation: totals and averages
|
||||||
|
const totalEmployees = tabular.length
|
||||||
|
const avgSalary = Math.round(tabular.reduce((sum, e) => sum + e.salary, 0) / totalEmployees)
|
||||||
const activeCount = tabular.filter(e => e.active).length
|
const activeCount = tabular.filter(e => e.active).length
|
||||||
const inactiveCount = tabular.filter(e => !e.active).length
|
const inactiveCount = tabular.filter(e => !e.active).length
|
||||||
|
|
||||||
questions.push(
|
questions.push(
|
||||||
|
{
|
||||||
|
id: `q${idCounter++}`,
|
||||||
|
prompt: 'How many employees are in the dataset?',
|
||||||
|
groundTruth: String(totalEmployees),
|
||||||
|
type: 'aggregation',
|
||||||
|
dataset: 'tabular',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: `q${idCounter++}`,
|
||||||
|
prompt: 'What is the average salary across all employees?',
|
||||||
|
groundTruth: String(avgSalary),
|
||||||
|
type: 'aggregation',
|
||||||
|
dataset: 'tabular',
|
||||||
|
},
|
||||||
{
|
{
|
||||||
id: `q${idCounter++}`,
|
id: `q${idCounter++}`,
|
||||||
prompt: 'How many employees are active?',
|
prompt: 'How many employees are active?',
|
||||||
groundTruth: String(activeCount),
|
groundTruth: String(activeCount),
|
||||||
type: 'filtering',
|
type: 'aggregation',
|
||||||
dataset: 'tabular',
|
dataset: 'tabular',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
id: `q${idCounter++}`,
|
id: `q${idCounter++}`,
|
||||||
prompt: 'How many employees are inactive?',
|
prompt: 'How many employees are inactive?',
|
||||||
groundTruth: String(inactiveCount),
|
groundTruth: String(inactiveCount),
|
||||||
type: 'filtering',
|
type: 'aggregation',
|
||||||
dataset: 'tabular',
|
dataset: 'tabular',
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
// Complex filtering: multi-condition (8 questions)
|
// Filtering: count by department with salary filter (multi-condition)
|
||||||
for (const dept of departments.slice(0, 4)) {
|
for (const dept of departments.slice(0, QUESTION_LIMITS.tabular.filteringMultiConditionDepartments)) {
|
||||||
const count = tabular.filter(e => e.department === dept && e.salary > 80000).length
|
const count = tabular.filter(e => e.department === dept && e.salary > QUESTION_THRESHOLDS.tabular.departmentSalaryThreshold).length
|
||||||
questions.push({
|
questions.push({
|
||||||
id: `q${idCounter++}`,
|
id: `q${idCounter++}`,
|
||||||
prompt: `How many employees in ${dept} have a salary greater than 80000?`,
|
prompt: `How many employees in ${dept} have a salary greater than ${QUESTION_THRESHOLDS.tabular.departmentSalaryThreshold}?`,
|
||||||
groundTruth: String(count),
|
groundTruth: String(count),
|
||||||
type: 'filtering',
|
type: 'filtering',
|
||||||
dataset: 'tabular',
|
dataset: 'tabular',
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const exp of [5, 10]) {
|
// Filtering: active employees by experience (multi-condition)
|
||||||
|
for (const exp of QUESTION_THRESHOLDS.tabular.experienceYears.slice(0, QUESTION_LIMITS.tabular.filteringExperience)) {
|
||||||
const count = tabular.filter(e => e.yearsExperience > exp && e.active).length
|
const count = tabular.filter(e => e.yearsExperience > exp && e.active).length
|
||||||
questions.push({
|
questions.push({
|
||||||
id: `q${idCounter++}`,
|
id: `q${idCounter++}`,
|
||||||
@@ -143,15 +168,35 @@ export function generateQuestions(): Question[] {
|
|||||||
dataset: 'tabular',
|
dataset: 'tabular',
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Filtering: department by experience (multi-condition)
|
||||||
|
for (const dept of departments.slice(0, QUESTION_LIMITS.tabular.filteringDepartmentExp)) {
|
||||||
|
const count = tabular.filter(e => e.department === dept && e.yearsExperience > QUESTION_THRESHOLDS.tabular.departmentExperienceThreshold).length
|
||||||
|
questions.push({
|
||||||
|
id: `q${idCounter++}`,
|
||||||
|
prompt: `How many employees in ${dept} have more than ${QUESTION_THRESHOLDS.tabular.departmentExperienceThreshold} years of experience?`,
|
||||||
|
groundTruth: String(count),
|
||||||
|
type: 'filtering',
|
||||||
|
dataset: 'tabular',
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// ========================================
|
// Filtering: department by active status (multi-condition)
|
||||||
// NESTED DATASET QUESTIONS (50 questions)
|
for (const dept of departments.slice(0, QUESTION_LIMITS.tabular.filteringDepartmentActive)) {
|
||||||
// ========================================
|
const count = tabular.filter(e => e.department === dept && e.active).length
|
||||||
|
questions.push({
|
||||||
|
id: `q${idCounter++}`,
|
||||||
|
prompt: `How many active employees work in ${dept}?`,
|
||||||
|
groundTruth: String(count),
|
||||||
|
type: 'filtering',
|
||||||
|
dataset: 'tabular',
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (nested.length > 0) {
|
if (nested.length > 0) {
|
||||||
// Field retrieval: order totals (20 questions)
|
// Field retrieval: order totals and statuses
|
||||||
for (let i = 0; i < Math.min(20, nested.length); i++) {
|
for (let i = 0; i < Math.min(QUESTION_LIMITS.nested.fieldRetrievalOrders, nested.length); i++) {
|
||||||
const order = nested[i * 2] || nested[i]
|
const order = nested[i * 2] || nested[i]
|
||||||
if (!order)
|
if (!order)
|
||||||
continue
|
continue
|
||||||
@@ -159,7 +204,7 @@ export function generateQuestions(): Question[] {
|
|||||||
if (i % 2 === 0) {
|
if (i % 2 === 0) {
|
||||||
questions.push({
|
questions.push({
|
||||||
id: `q${idCounter++}`,
|
id: `q${idCounter++}`,
|
||||||
prompt: `What is the total amount for order ${order.orderId}?`,
|
prompt: `What is the total for order ${order.orderId}?`,
|
||||||
groundTruth: String(order.total),
|
groundTruth: String(order.total),
|
||||||
type: 'field-retrieval',
|
type: 'field-retrieval',
|
||||||
dataset: 'nested',
|
dataset: 'nested',
|
||||||
@@ -176,12 +221,13 @@ export function generateQuestions(): Question[] {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Field retrieval: customer info (15 questions)
|
// Field retrieval: customer info and order dates (expanded)
|
||||||
for (let i = 0; i < Math.min(15, nested.length); i++) {
|
for (let i = 0; i < Math.min(QUESTION_LIMITS.nested.fieldRetrievalCustomers, nested.length); i++) {
|
||||||
const order = nested[i * 3] || nested[i]
|
const order = nested[i * 2 + 1] || nested[i]
|
||||||
if (!order)
|
if (!order)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
if (i % 4 === 0) {
|
||||||
questions.push({
|
questions.push({
|
||||||
id: `q${idCounter++}`,
|
id: `q${idCounter++}`,
|
||||||
prompt: `What is the customer name for order ${order.orderId}?`,
|
prompt: `What is the customer name for order ${order.orderId}?`,
|
||||||
@@ -190,56 +236,143 @@ export function generateQuestions(): Question[] {
|
|||||||
dataset: 'nested',
|
dataset: 'nested',
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
else if (i % 4 === 1) {
|
||||||
|
questions.push({
|
||||||
|
id: `q${idCounter++}`,
|
||||||
|
prompt: `What is the customer email for order ${order.orderId}?`,
|
||||||
|
groundTruth: order.customer.email,
|
||||||
|
type: 'field-retrieval',
|
||||||
|
dataset: 'nested',
|
||||||
|
})
|
||||||
|
}
|
||||||
|
else if (i % 4 === 2) {
|
||||||
|
questions.push({
|
||||||
|
id: `q${idCounter++}`,
|
||||||
|
prompt: `What is the order date for order ${order.orderId}?`,
|
||||||
|
groundTruth: order.orderDate || '',
|
||||||
|
type: 'field-retrieval',
|
||||||
|
dataset: 'nested',
|
||||||
|
})
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
questions.push({
|
||||||
|
id: `q${idCounter++}`,
|
||||||
|
prompt: `How many items are in order ${order.orderId}?`,
|
||||||
|
groundTruth: String(order.items.length),
|
||||||
|
type: 'field-retrieval',
|
||||||
|
dataset: 'nested',
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Aggregation: count by status
|
// Aggregation: totals and averages
|
||||||
|
const totalRevenue = nested.reduce((sum, o) => sum + o.total, 0)
|
||||||
|
const avgOrderValue = totalRevenue / nested.length
|
||||||
|
const totalOrders = nested.length
|
||||||
|
const maxOrderValue = Math.max(...nested.map(o => o.total))
|
||||||
|
|
||||||
|
// Count by status
|
||||||
const statuses = [...new Set(nested.map(o => o.status))]
|
const statuses = [...new Set(nested.map(o => o.status))]
|
||||||
for (const status of statuses) {
|
for (const status of statuses.slice(0, QUESTION_LIMITS.nested.aggregationStatuses)) {
|
||||||
const count = nested.filter(o => o.status === status).length
|
const count = nested.filter(o => o.status === status).length
|
||||||
questions.push({
|
questions.push({
|
||||||
id: `q${idCounter++}`,
|
id: `q${idCounter++}`,
|
||||||
prompt: `How many orders have status "${status}"?`,
|
prompt: `How many orders have status "${status}"?`,
|
||||||
groundTruth: String(count),
|
groundTruth: String(count),
|
||||||
type: 'filtering',
|
type: 'aggregation',
|
||||||
dataset: 'nested',
|
dataset: 'nested',
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// Aggregation: total revenue
|
questions.push(
|
||||||
const totalRevenue = nested.reduce((sum, o) => sum + o.total, 0)
|
{
|
||||||
questions.push({
|
|
||||||
id: `q${idCounter++}`,
|
id: `q${idCounter++}`,
|
||||||
prompt: 'What is the total revenue across all orders?',
|
prompt: 'What is the total revenue across all orders?',
|
||||||
groundTruth: String(totalRevenue.toFixed(2)),
|
groundTruth: String(totalRevenue.toFixed(2)),
|
||||||
type: 'aggregation',
|
type: 'aggregation',
|
||||||
dataset: 'nested',
|
dataset: 'nested',
|
||||||
})
|
},
|
||||||
|
{
|
||||||
|
id: `q${idCounter++}`,
|
||||||
|
prompt: 'What is the average order value?',
|
||||||
|
groundTruth: String(avgOrderValue.toFixed(2)),
|
||||||
|
type: 'aggregation',
|
||||||
|
dataset: 'nested',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: `q${idCounter++}`,
|
||||||
|
prompt: 'How many orders are in the dataset?',
|
||||||
|
groundTruth: String(totalOrders),
|
||||||
|
type: 'aggregation',
|
||||||
|
dataset: 'nested',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: `q${idCounter++}`,
|
||||||
|
prompt: 'What is the highest order total?',
|
||||||
|
groundTruth: String(maxOrderValue.toFixed(2)),
|
||||||
|
type: 'aggregation',
|
||||||
|
dataset: 'nested',
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
// Filtering: high-value orders (3 questions)
|
// Aggregation: high-value orders (single-condition filter)
|
||||||
const highValueThresholds = [200, 400, 600]
|
for (const threshold of QUESTION_THRESHOLDS.nested.highValueOrders) {
|
||||||
for (const threshold of highValueThresholds) {
|
|
||||||
const count = nested.filter(o => o.total > threshold).length
|
const count = nested.filter(o => o.total > threshold).length
|
||||||
questions.push({
|
questions.push({
|
||||||
id: `q${idCounter++}`,
|
id: `q${idCounter++}`,
|
||||||
prompt: `How many orders have a total greater than ${threshold}?`,
|
prompt: `How many orders have a total greater than ${threshold}?`,
|
||||||
groundTruth: String(count),
|
groundTruth: String(count),
|
||||||
|
type: 'aggregation',
|
||||||
|
dataset: 'nested',
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// Filtering: multi-condition queries (status AND value)
|
||||||
|
const orderStatuses = [...new Set(nested.map(o => o.status))]
|
||||||
|
for (const status of orderStatuses.slice(0, QUESTION_LIMITS.nested.filteringStatusAndValue)) {
|
||||||
|
const count = nested.filter(o => o.status === status && o.total > QUESTION_THRESHOLDS.nested.statusValueThreshold).length
|
||||||
|
questions.push({
|
||||||
|
id: `q${idCounter++}`,
|
||||||
|
prompt: `How many orders have status "${status}" and total greater than ${QUESTION_THRESHOLDS.nested.statusValueThreshold}?`,
|
||||||
|
groundTruth: String(count),
|
||||||
|
type: 'filtering',
|
||||||
|
dataset: 'nested',
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// Filtering: status AND items count (multi-condition)
|
||||||
|
for (const status of orderStatuses.slice(0, QUESTION_LIMITS.nested.filteringStatusAndItems)) {
|
||||||
|
const count = nested.filter(o => o.status === status && o.items.length >= QUESTION_THRESHOLDS.nested.itemCountThreshold).length
|
||||||
|
questions.push({
|
||||||
|
id: `q${idCounter++}`,
|
||||||
|
prompt: `How many orders have status "${status}" and at least ${QUESTION_THRESHOLDS.nested.itemCountThreshold} items?`,
|
||||||
|
groundTruth: String(count),
|
||||||
|
type: 'filtering',
|
||||||
|
dataset: 'nested',
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// Filtering: total AND items count (multi-condition)
|
||||||
|
for (const threshold of QUESTION_THRESHOLDS.nested.totalThresholdsForItems) {
|
||||||
|
const count = nested.filter(o => o.total > threshold && o.items.length >= QUESTION_THRESHOLDS.nested.itemCountThreshold).length
|
||||||
|
questions.push({
|
||||||
|
id: `q${idCounter++}`,
|
||||||
|
prompt: `How many orders have a total greater than ${threshold} and at least ${QUESTION_THRESHOLDS.nested.itemCountThreshold} items?`,
|
||||||
|
groundTruth: String(count),
|
||||||
type: 'filtering',
|
type: 'filtering',
|
||||||
dataset: 'nested',
|
dataset: 'nested',
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ========================================
|
|
||||||
// ANALYTICS DATASET QUESTIONS (40 questions)
|
|
||||||
// ========================================
|
|
||||||
|
|
||||||
if (analytics.length > 0) {
|
if (analytics.length > 0) {
|
||||||
// Field retrieval: specific dates (20 questions)
|
// Field retrieval: specific dates (expanded with all metrics)
|
||||||
for (let i = 0; i < Math.min(20, analytics.length); i++) {
|
for (let i = 0; i < Math.min(QUESTION_LIMITS.analytics.fieldRetrievalDates, analytics.length); i++) {
|
||||||
const metric = analytics[i * 3] || analytics[i]
|
const metric = analytics[i * 3] || analytics[i]
|
||||||
if (!metric)
|
if (!metric)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if (i % 2 === 0) {
|
if (i % 5 === 0) {
|
||||||
questions.push({
|
questions.push({
|
||||||
id: `q${idCounter++}`,
|
id: `q${idCounter++}`,
|
||||||
prompt: `How many views were recorded on ${metric.date}?`,
|
prompt: `How many views were recorded on ${metric.date}?`,
|
||||||
@@ -248,7 +381,7 @@ export function generateQuestions(): Question[] {
|
|||||||
dataset: 'analytics',
|
dataset: 'analytics',
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
else {
|
else if (i % 5 === 1) {
|
||||||
questions.push({
|
questions.push({
|
||||||
id: `q${idCounter++}`,
|
id: `q${idCounter++}`,
|
||||||
prompt: `What was the revenue on ${metric.date}?`,
|
prompt: `What was the revenue on ${metric.date}?`,
|
||||||
@@ -257,12 +390,42 @@ export function generateQuestions(): Question[] {
|
|||||||
dataset: 'analytics',
|
dataset: 'analytics',
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
else if (i % 5 === 2) {
|
||||||
|
questions.push({
|
||||||
|
id: `q${idCounter++}`,
|
||||||
|
prompt: `What was the conversion count on ${metric.date}?`,
|
||||||
|
groundTruth: String(metric.conversions),
|
||||||
|
type: 'field-retrieval',
|
||||||
|
dataset: 'analytics',
|
||||||
|
})
|
||||||
|
}
|
||||||
|
else if (i % 5 === 3) {
|
||||||
|
questions.push({
|
||||||
|
id: `q${idCounter++}`,
|
||||||
|
prompt: `How many clicks were recorded on ${metric.date}?`,
|
||||||
|
groundTruth: String(metric.clicks),
|
||||||
|
type: 'field-retrieval',
|
||||||
|
dataset: 'analytics',
|
||||||
|
})
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
questions.push({
|
||||||
|
id: `q${idCounter++}`,
|
||||||
|
prompt: `What was the bounce rate on ${metric.date}?`,
|
||||||
|
groundTruth: String(metric.bounceRate),
|
||||||
|
type: 'field-retrieval',
|
||||||
|
dataset: 'analytics',
|
||||||
|
})
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Aggregation: totals (4 questions)
|
// Aggregation: totals and averages
|
||||||
const totalViews = analytics.reduce((sum, m) => sum + m.views, 0)
|
const totalViews = analytics.reduce((sum, m) => sum + m.views, 0)
|
||||||
const totalRevenue = analytics.reduce((sum, m) => sum + m.revenue, 0)
|
const totalRevenue = analytics.reduce((sum, m) => sum + m.revenue, 0)
|
||||||
const totalConversions = analytics.reduce((sum, m) => sum + m.conversions, 0)
|
const totalConversions = analytics.reduce((sum, m) => sum + m.conversions, 0)
|
||||||
|
const avgViews = Math.round(totalViews / analytics.length)
|
||||||
|
const avgRevenue = totalRevenue / analytics.length
|
||||||
|
const avgConversions = Math.round(totalConversions / analytics.length)
|
||||||
|
|
||||||
questions.push(
|
questions.push(
|
||||||
{
|
{
|
||||||
@@ -286,27 +449,97 @@ export function generateQuestions(): Question[] {
|
|||||||
type: 'aggregation',
|
type: 'aggregation',
|
||||||
dataset: 'analytics',
|
dataset: 'analytics',
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
id: `q${idCounter++}`,
|
||||||
|
prompt: 'What is the average number of views per day?',
|
||||||
|
groundTruth: String(avgViews),
|
||||||
|
type: 'aggregation',
|
||||||
|
dataset: 'analytics',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: `q${idCounter++}`,
|
||||||
|
prompt: 'What is the average revenue per day?',
|
||||||
|
groundTruth: String(avgRevenue.toFixed(2)),
|
||||||
|
type: 'aggregation',
|
||||||
|
dataset: 'analytics',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: `q${idCounter++}`,
|
||||||
|
prompt: 'What is the average number of conversions per day?',
|
||||||
|
groundTruth: String(avgConversions),
|
||||||
|
type: 'aggregation',
|
||||||
|
dataset: 'analytics',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: `q${idCounter++}`,
|
||||||
|
prompt: 'How many days are included in the analytics data?',
|
||||||
|
groundTruth: String(analytics.length),
|
||||||
|
type: 'aggregation',
|
||||||
|
dataset: 'analytics',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: `q${idCounter++}`,
|
||||||
|
prompt: 'What is the highest number of views recorded in a single day?',
|
||||||
|
groundTruth: String(Math.max(...analytics.map(m => m.views))),
|
||||||
|
type: 'aggregation',
|
||||||
|
dataset: 'analytics',
|
||||||
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
// Filtering: high-performing days (10 questions)
|
// Aggregation: high-performing days (single-condition filters)
|
||||||
const viewThresholds = [5000, 6000, 7000]
|
for (const threshold of QUESTION_THRESHOLDS.analytics.views) {
|
||||||
for (const threshold of viewThresholds) {
|
|
||||||
const count = analytics.filter(m => m.views > threshold).length
|
const count = analytics.filter(m => m.views > threshold).length
|
||||||
questions.push({
|
questions.push({
|
||||||
id: `q${idCounter++}`,
|
id: `q${idCounter++}`,
|
||||||
prompt: `How many days had more than ${threshold} views?`,
|
prompt: `How many days had more than ${threshold} views?`,
|
||||||
groundTruth: String(count),
|
groundTruth: String(count),
|
||||||
|
type: 'aggregation',
|
||||||
|
dataset: 'analytics',
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// Filtering: multi-condition queries (views AND conversions)
|
||||||
|
for (const viewThreshold of QUESTION_THRESHOLDS.analytics.viewsForFiltering) {
|
||||||
|
const count = analytics.filter(m => m.views > viewThreshold && m.conversions > QUESTION_THRESHOLDS.analytics.conversionsForFiltering).length
|
||||||
|
questions.push({
|
||||||
|
id: `q${idCounter++}`,
|
||||||
|
prompt: `How many days had more than ${viewThreshold} views and more than ${QUESTION_THRESHOLDS.analytics.conversionsForFiltering} conversions?`,
|
||||||
|
groundTruth: String(count),
|
||||||
type: 'filtering',
|
type: 'filtering',
|
||||||
dataset: 'analytics',
|
dataset: 'analytics',
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
const conversionThresholds = [10, 20, 30]
|
// Filtering: views AND revenue (expanded)
|
||||||
for (const threshold of conversionThresholds) {
|
for (const revenueThreshold of QUESTION_THRESHOLDS.analytics.revenueThresholds.slice(0, 5)) {
|
||||||
const count = analytics.filter(m => m.conversions > threshold).length
|
const count = analytics.filter(m => m.views > QUESTION_THRESHOLDS.analytics.viewsThresholdForRevenue && m.revenue > revenueThreshold).length
|
||||||
questions.push({
|
questions.push({
|
||||||
id: `q${idCounter++}`,
|
id: `q${idCounter++}`,
|
||||||
prompt: `How many days had more than ${threshold} conversions?`,
|
prompt: `How many days had more than ${QUESTION_THRESHOLDS.analytics.viewsThresholdForRevenue} views and revenue greater than ${revenueThreshold}?`,
|
||||||
|
groundTruth: String(count),
|
||||||
|
type: 'filtering',
|
||||||
|
dataset: 'analytics',
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// Filtering: clicks AND conversions (multi-condition)
|
||||||
|
for (const clickThreshold of QUESTION_THRESHOLDS.analytics.clicksForFiltering) {
|
||||||
|
const count = analytics.filter(m => m.clicks > clickThreshold && m.conversions > QUESTION_THRESHOLDS.analytics.conversionsForClickFiltering).length
|
||||||
|
questions.push({
|
||||||
|
id: `q${idCounter++}`,
|
||||||
|
prompt: `How many days had more than ${clickThreshold} clicks and more than ${QUESTION_THRESHOLDS.analytics.conversionsForClickFiltering} conversions?`,
|
||||||
|
groundTruth: String(count),
|
||||||
|
type: 'filtering',
|
||||||
|
dataset: 'analytics',
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// Filtering: revenue AND bounce rate (multi-condition)
|
||||||
|
for (const revenueThreshold of QUESTION_THRESHOLDS.analytics.revenueForBounceRate) {
|
||||||
|
const count = analytics.filter(m => m.revenue > revenueThreshold && m.bounceRate < QUESTION_THRESHOLDS.analytics.bounceRateThreshold).length
|
||||||
|
questions.push({
|
||||||
|
id: `q${idCounter++}`,
|
||||||
|
prompt: `How many days had revenue greater than ${revenueThreshold} and bounce rate less than ${QUESTION_THRESHOLDS.analytics.bounceRateThreshold}?`,
|
||||||
groundTruth: String(count),
|
groundTruth: String(count),
|
||||||
type: 'filtering',
|
type: 'filtering',
|
||||||
dataset: 'analytics',
|
dataset: 'analytics',
|
||||||
@@ -314,79 +547,159 @@ export function generateQuestions(): Question[] {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ========================================
|
|
||||||
// GITHUB DATASET QUESTIONS (40 questions)
|
|
||||||
// ========================================
|
|
||||||
|
|
||||||
if (github.length > 0) {
|
if (github.length > 0) {
|
||||||
// Field retrieval: specific repos (20 questions)
|
// Helper to extract owner from repo field
|
||||||
for (let i = 0; i < Math.min(20, github.length); i++) {
|
const getOwner = (repoFullName: string) => repoFullName.split('/')[0]!
|
||||||
const repo = github[i * 10] || github[i]
|
|
||||||
|
// Field retrieval: specific repos (diverse fields)
|
||||||
|
for (let i = 0; i < Math.min(QUESTION_LIMITS.github.fieldRetrievalRepos, github.length); i++) {
|
||||||
|
const repo = github[i * 7]
|
||||||
if (!repo)
|
if (!repo)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if (i % 2 === 0) {
|
if (i % 5 === 0) {
|
||||||
questions.push({
|
questions.push({
|
||||||
id: `q${idCounter++}`,
|
id: `q${idCounter++}`,
|
||||||
prompt: `How many stars does ${repo.owner}/${repo.name} have?`,
|
prompt: `How many stars does ${repo.repo} have?`,
|
||||||
groundTruth: String(repo.stars),
|
groundTruth: String(repo.stars),
|
||||||
type: 'field-retrieval',
|
type: 'field-retrieval',
|
||||||
dataset: 'github',
|
dataset: 'github',
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
else if (i % 5 === 1) {
|
||||||
|
questions.push({
|
||||||
|
id: `q${idCounter++}`,
|
||||||
|
prompt: `How many forks does ${repo.repo} have?`,
|
||||||
|
groundTruth: String(repo.forks),
|
||||||
|
type: 'field-retrieval',
|
||||||
|
dataset: 'github',
|
||||||
|
})
|
||||||
|
}
|
||||||
|
else if (i % 5 === 2) {
|
||||||
|
questions.push({
|
||||||
|
id: `q${idCounter++}`,
|
||||||
|
prompt: `Who is the owner of ${repo.repo}?`,
|
||||||
|
groundTruth: getOwner(repo.repo),
|
||||||
|
type: 'field-retrieval',
|
||||||
|
dataset: 'github',
|
||||||
|
})
|
||||||
|
}
|
||||||
|
else if (i % 5 === 3) {
|
||||||
|
questions.push({
|
||||||
|
id: `q${idCounter++}`,
|
||||||
|
prompt: `What is the default branch of ${repo.repo}?`,
|
||||||
|
groundTruth: repo.defaultBranch,
|
||||||
|
type: 'field-retrieval',
|
||||||
|
dataset: 'github',
|
||||||
|
})
|
||||||
|
}
|
||||||
else {
|
else {
|
||||||
questions.push({
|
questions.push({
|
||||||
id: `q${idCounter++}`,
|
id: `q${idCounter++}`,
|
||||||
prompt: `How many forks does ${repo.owner}/${repo.name} have?`,
|
prompt: `How many watchers does ${repo.repo} have?`,
|
||||||
groundTruth: String(repo.forks),
|
groundTruth: String(repo.watchers),
|
||||||
type: 'field-retrieval',
|
type: 'field-retrieval',
|
||||||
dataset: 'github',
|
dataset: 'github',
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Aggregation: count by owner (5 questions)
|
// Aggregation: popular repositories
|
||||||
const owners = [...new Set(github.map(r => r.owner))]
|
|
||||||
for (const owner of owners.slice(0, 5)) {
|
|
||||||
const count = github.filter(r => r.owner === owner).length
|
|
||||||
questions.push({
|
|
||||||
id: `q${idCounter++}`,
|
|
||||||
prompt: `How many repositories does ${owner} have in the dataset?`,
|
|
||||||
groundTruth: String(count),
|
|
||||||
type: 'aggregation',
|
|
||||||
dataset: 'github',
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
// Aggregation: total stars
|
|
||||||
const totalStars = github.reduce((sum, r) => sum + r.stars, 0)
|
const totalStars = github.reduce((sum, r) => sum + r.stars, 0)
|
||||||
questions.push({
|
const totalRepos = github.length
|
||||||
|
const avgStars = Math.round(totalStars / totalRepos)
|
||||||
|
|
||||||
|
questions.push(
|
||||||
|
{
|
||||||
id: `q${idCounter++}`,
|
id: `q${idCounter++}`,
|
||||||
prompt: 'What is the total number of stars across all repositories?',
|
prompt: 'What is the total number of stars across all repositories?',
|
||||||
groundTruth: String(totalStars),
|
groundTruth: String(totalStars),
|
||||||
type: 'aggregation',
|
type: 'aggregation',
|
||||||
dataset: 'github',
|
dataset: 'github',
|
||||||
})
|
},
|
||||||
|
{
|
||||||
|
id: `q${idCounter++}`,
|
||||||
|
prompt: 'How many repositories are in the dataset?',
|
||||||
|
groundTruth: String(totalRepos),
|
||||||
|
type: 'aggregation',
|
||||||
|
dataset: 'github',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: `q${idCounter++}`,
|
||||||
|
prompt: 'What is the average number of stars per repository?',
|
||||||
|
groundTruth: String(avgStars),
|
||||||
|
type: 'aggregation',
|
||||||
|
dataset: 'github',
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
// Filtering: popular repos (8 questions)
|
// Aggregation: star thresholds (single-condition filters)
|
||||||
const starThresholds = [10000, 50000, 100000]
|
for (const threshold of QUESTION_THRESHOLDS.github.stars) {
|
||||||
for (const threshold of starThresholds) {
|
|
||||||
const count = github.filter(r => r.stars > threshold).length
|
const count = github.filter(r => r.stars > threshold).length
|
||||||
questions.push({
|
questions.push({
|
||||||
id: `q${idCounter++}`,
|
id: `q${idCounter++}`,
|
||||||
prompt: `How many repositories have more than ${threshold} stars?`,
|
prompt: `How many repositories have more than ${threshold} stars?`,
|
||||||
groundTruth: String(count),
|
groundTruth: String(count),
|
||||||
|
type: 'aggregation',
|
||||||
|
dataset: 'github',
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// Aggregation: fork thresholds (single-condition filters)
|
||||||
|
for (const threshold of QUESTION_THRESHOLDS.github.forks) {
|
||||||
|
const count = github.filter(r => r.forks > threshold).length
|
||||||
|
questions.push({
|
||||||
|
id: `q${idCounter++}`,
|
||||||
|
prompt: `How many repositories have more than ${threshold} forks?`,
|
||||||
|
groundTruth: String(count),
|
||||||
|
type: 'aggregation',
|
||||||
|
dataset: 'github',
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// Aggregation: watcher thresholds (single-condition filters)
|
||||||
|
for (const threshold of QUESTION_THRESHOLDS.github.watchers) {
|
||||||
|
const count = github.filter(r => r.watchers > threshold).length
|
||||||
|
questions.push({
|
||||||
|
id: `q${idCounter++}`,
|
||||||
|
prompt: `How many repositories have more than ${threshold} watchers?`,
|
||||||
|
groundTruth: String(count),
|
||||||
|
type: 'aggregation',
|
||||||
|
dataset: 'github',
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// Aggregation: default branch counts
|
||||||
|
const branches = [...new Set(github.map(r => r.defaultBranch))]
|
||||||
|
for (const branch of branches.slice(0, QUESTION_LIMITS.github.aggregationBranches)) {
|
||||||
|
const count = github.filter(r => r.defaultBranch === branch).length
|
||||||
|
questions.push({
|
||||||
|
id: `q${idCounter++}`,
|
||||||
|
prompt: `How many repositories use "${branch}" as their default branch?`,
|
||||||
|
groundTruth: String(count),
|
||||||
|
type: 'aggregation',
|
||||||
|
dataset: 'github',
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// Filtering: multi-condition queries (stars AND forks)
|
||||||
|
for (const combo of QUESTION_THRESHOLDS.github.starForkCombinations.slice(0, QUESTION_LIMITS.github.filteringStarsAndForks)) {
|
||||||
|
const count = github.filter(r => r.stars > combo.stars && r.forks > combo.forks).length
|
||||||
|
questions.push({
|
||||||
|
id: `q${idCounter++}`,
|
||||||
|
prompt: `How many repositories have more than ${combo.stars} stars and more than ${combo.forks} forks?`,
|
||||||
|
groundTruth: String(count),
|
||||||
type: 'filtering',
|
type: 'filtering',
|
||||||
dataset: 'github',
|
dataset: 'github',
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
const forkThresholds = [1000, 5000, 10000]
|
// Filtering: stars AND watchers (multi-condition)
|
||||||
for (const threshold of forkThresholds) {
|
for (const combo of QUESTION_THRESHOLDS.github.starWatcherCombinations) {
|
||||||
const count = github.filter(r => r.forks > threshold).length
|
const count = github.filter(r => r.stars > combo.stars && r.watchers > combo.watchers).length
|
||||||
questions.push({
|
questions.push({
|
||||||
id: `q${idCounter++}`,
|
id: `q${idCounter++}`,
|
||||||
prompt: `How many repositories have more than ${threshold} forks?`,
|
prompt: `How many repositories have more than ${combo.stars} stars and more than ${combo.watchers} watchers?`,
|
||||||
groundTruth: String(count),
|
groundTruth: String(count),
|
||||||
type: 'filtering',
|
type: 'filtering',
|
||||||
dataset: 'github',
|
dataset: 'github',
|
||||||
@@ -394,14 +707,5 @@ export function generateQuestions(): Question[] {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
consola.info(`Question breakdown:`)
|
|
||||||
consola.box(`
|
|
||||||
Tabular: ${questions.filter(q => q.dataset === 'tabular').length}
|
|
||||||
Nested: ${questions.filter(q => q.dataset === 'nested').length}
|
|
||||||
Analytics: ${questions.filter(q => q.dataset === 'analytics').length}
|
|
||||||
GitHub: ${questions.filter(q => q.dataset === 'github').length}
|
|
||||||
Total: ${questions.length}
|
|
||||||
`.trim())
|
|
||||||
|
|
||||||
return questions
|
return questions
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,21 +1,9 @@
|
|||||||
/**
|
|
||||||
* Report generation for TOON benchmarks
|
|
||||||
*
|
|
||||||
* Handles:
|
|
||||||
* - Statistical analysis
|
|
||||||
* - Markdown report generation with visual elements
|
|
||||||
* - Per-dataset breakdowns
|
|
||||||
* - Cost analysis
|
|
||||||
* - Result file saving
|
|
||||||
*/
|
|
||||||
|
|
||||||
import type { EvaluationResult, FormatResult, Question } from './types'
|
import type { EvaluationResult, FormatResult, Question } from './types'
|
||||||
import * as fsp from 'node:fs/promises'
|
import * as fsp from 'node:fs/promises'
|
||||||
import * as path from 'node:path'
|
import * as path from 'node:path'
|
||||||
import { BENCHMARKS_DIR } from './constants'
|
import { BENCHMARKS_DIR } from './constants'
|
||||||
import { datasets } from './datasets'
|
import { datasets } from './datasets'
|
||||||
import { models } from './evaluate'
|
import { createProgressBar, ensureDir, tokenize } from './utils'
|
||||||
import { createProgressBar, ensureDir, saveJsonFile, tokenize } from './utils'
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Calculate per-format statistics from evaluation results
|
* Calculate per-format statistics from evaluation results
|
||||||
@@ -63,8 +51,8 @@ export function generateMarkdownReport(
|
|||||||
const json = formatResults.find(r => r.format === 'json')
|
const json = formatResults.find(r => r.format === 'json')
|
||||||
|
|
||||||
// Build model-by-model breakdown with ASCII bars
|
// Build model-by-model breakdown with ASCII bars
|
||||||
const modelCount = Object.keys(models).length
|
const modelNames = [...new Set(results.map(r => r.model))].reverse()
|
||||||
const modelNames = Object.keys(models)
|
const modelCount = modelNames.length
|
||||||
|
|
||||||
const modelBreakdown = modelNames.map((modelName, i) => {
|
const modelBreakdown = modelNames.map((modelName, i) => {
|
||||||
const modelResults = formatResults.map((fr) => {
|
const modelResults = formatResults.map((fr) => {
|
||||||
@@ -136,7 +124,7 @@ export function generateMarkdownReport(
|
|||||||
})
|
})
|
||||||
|
|
||||||
const tableRows = datasetResults.slice(0, 6).map(result =>
|
const tableRows = datasetResults.slice(0, 6).map(result =>
|
||||||
`| \`${result.format}\` | ${(result.accuracy * 100).toFixed(1)}% | ${result.tokens.toLocaleString()} | ${result.correctCount}/${result.totalCount} |`,
|
`| \`${result.format}\` | ${(result.accuracy * 100).toFixed(1)}% | ${result.tokens.toLocaleString('en-US')} | ${result.correctCount}/${result.totalCount} |`,
|
||||||
).join('\n')
|
).join('\n')
|
||||||
|
|
||||||
return `
|
return `
|
||||||
@@ -180,6 +168,27 @@ ${tableRows}
|
|||||||
// Calculate total unique questions
|
// Calculate total unique questions
|
||||||
const totalQuestions = [...new Set(results.map(r => r.questionId))].length
|
const totalQuestions = [...new Set(results.map(r => r.questionId))].length
|
||||||
|
|
||||||
|
// Calculate question type distribution
|
||||||
|
const fieldRetrievalCount = questions.filter(q => q.type === 'field-retrieval').length
|
||||||
|
const aggregationCount = questions.filter(q => q.type === 'aggregation').length
|
||||||
|
const filteringCount = questions.filter(q => q.type === 'filtering').length
|
||||||
|
|
||||||
|
const fieldRetrievalPercent = ((fieldRetrievalCount / totalQuestions) * 100).toFixed(0)
|
||||||
|
const aggregationPercent = ((aggregationCount / totalQuestions) * 100).toFixed(0)
|
||||||
|
const filteringPercent = ((filteringCount / totalQuestions) * 100).toFixed(0)
|
||||||
|
|
||||||
|
// Calculate dataset sizes
|
||||||
|
const tabularSize = datasets.find(d => d.name === 'tabular')?.data.employees?.length || 0
|
||||||
|
const nestedSize = datasets.find(d => d.name === 'nested')?.data.orders?.length || 0
|
||||||
|
const analyticsSize = datasets.find(d => d.name === 'analytics')?.data.metrics?.length || 0
|
||||||
|
const githubSize = datasets.find(d => d.name === 'github')?.data.repositories?.length || 0
|
||||||
|
|
||||||
|
// Calculate number of formats and models
|
||||||
|
const formatCount = formatResults.length
|
||||||
|
const modelsUsed = [...new Set(results.map(r => r.model))]
|
||||||
|
const modelsListStr = modelsUsed.map(m => `\`${m}\``).join(', ')
|
||||||
|
const totalEvaluations = totalQuestions * formatCount * modelsUsed.length
|
||||||
|
|
||||||
return `
|
return `
|
||||||
### Retrieval Accuracy
|
### Retrieval Accuracy
|
||||||
|
|
||||||
@@ -213,39 +222,41 @@ This benchmark tests **LLM comprehension and data retrieval accuracy** across di
|
|||||||
|
|
||||||
Four datasets designed to test different structural patterns:
|
Four datasets designed to test different structural patterns:
|
||||||
|
|
||||||
1. **Tabular** (100 employee records): Uniform objects with identical fields – optimal for TOON's tabular format.
|
1. **Tabular** (${tabularSize} employee records): Uniform objects with identical fields – optimal for TOON's tabular format.
|
||||||
2. **Nested** (50 e-commerce orders): Complex structures with nested customer objects and item arrays.
|
2. **Nested** (${nestedSize} e-commerce orders): Complex structures with nested customer objects and item arrays.
|
||||||
3. **Analytics** (60 days of metrics): Time-series data with dates and numeric values.
|
3. **Analytics** (${analyticsSize} days of metrics): Time-series data with dates and numeric values.
|
||||||
4. **GitHub** (100 repositories): Real-world data from top GitHub repos by stars.
|
4. **GitHub** (${githubSize} repositories): Real-world data from top GitHub repos by stars.
|
||||||
|
|
||||||
#### Question Types
|
#### Question Types
|
||||||
|
|
||||||
${totalQuestions} questions are generated dynamically across three categories:
|
${totalQuestions} questions are generated dynamically across three categories:
|
||||||
|
|
||||||
- **Field retrieval (50%)**: Direct value lookups
|
\- **Field retrieval (${fieldRetrievalPercent}%)**: Direct value lookups or values that can be read straight off a record (including booleans and simple counts such as array lengths)
|
||||||
- Example: "What is Alice's salary?" → \`75000\`
|
- Example: "What is Alice's salary?" → \`75000\`
|
||||||
|
- Example: "How many items are in order ORD-0042?" → \`3\`
|
||||||
- Example: "What is the customer name for order ORD-0042?" → \`John Doe\`
|
- Example: "What is the customer name for order ORD-0042?" → \`John Doe\`
|
||||||
|
|
||||||
- **Aggregation (25%)**: Counting and summation tasks
|
- **Aggregation (${aggregationPercent}%)**: Dataset-level totals and averages plus single-condition filters (counts, sums, min/max comparisons)
|
||||||
- Example: "How many employees work in Engineering?" → \`17\`
|
- Example: "How many employees work in Engineering?" → \`17\`
|
||||||
- Example: "What is the total revenue across all orders?" → \`45123.50\`
|
- Example: "What is the total revenue across all orders?" → \`45123.50\`
|
||||||
|
- Example: "How many employees have salary > 80000?" → \`23\`
|
||||||
|
|
||||||
- **Filtering (25%)**: Conditional queries
|
- **Filtering (${filteringPercent}%)**: Multi-condition queries requiring compound logic (AND constraints across fields)
|
||||||
- Example: "How many employees in Sales have salary > 80000?" → \`5\`
|
- Example: "How many employees in Sales have salary > 80000?" → \`5\`
|
||||||
- Example: "How many orders have total > 400?" → \`12\`
|
- Example: "How many active employees have more than 10 years of experience?" → \`8\`
|
||||||
|
|
||||||
#### Evaluation Process
|
#### Evaluation Process
|
||||||
|
|
||||||
1. **Format conversion:** Each dataset is converted to all 5 formats (TOON, JSON, YAML, CSV, XML).
|
1. **Format conversion:** Each dataset is converted to all ${formatCount} formats (${formatResults.map(f => f.format.toUpperCase()).join(', ')}).
|
||||||
2. **Query LLM**: Each model receives formatted data + question in a prompt and extracts the answer.
|
2. **Query LLM**: Each model receives formatted data + question in a prompt and extracts the answer.
|
||||||
4. **Validate with LLM-as-judge**: \`gpt-5-nano\` validates if the answer is semantically correct (e.g., \`50000\` = \`$50,000\`, \`Engineering\` = \`engineering\`, \`2025-01-01\` = \`January 1, 2025\`).
|
3. **Validate with LLM-as-judge**: \`gpt-5-nano\` validates if the answer is semantically correct (e.g., \`50000\` = \`$50,000\`, \`Engineering\` = \`engineering\`, \`2025-01-01\` = \`January 1, 2025\`).
|
||||||
|
|
||||||
#### Models & Configuration
|
#### Models & Configuration
|
||||||
|
|
||||||
- **Models tested**: \`gpt-5-nano\`, \`claude-haiku-4-5\`, \`gemini-2.5-flash\`
|
- **Models tested**: ${modelsListStr}
|
||||||
- **Token counting**: Using \`gpt-tokenizer\` with \`o200k_base\` encoding (GPT-5 tokenizer)
|
- **Token counting**: Using \`gpt-tokenizer\` with \`o200k_base\` encoding (GPT-5 tokenizer)
|
||||||
- **Temperature**: 0 (for non-reasoning models)
|
- **Temperature**: 0 (for non-reasoning models)
|
||||||
- **Total evaluations**: 159 questions × 5 formats × 3 models = 2,385 LLM calls
|
- **Total evaluations**: ${totalQuestions} questions × ${formatCount} formats × ${modelsUsed.length} models = ${totalEvaluations.toLocaleString('en-US')} LLM calls
|
||||||
|
|
||||||
</details>
|
</details>
|
||||||
`.trimStart()
|
`.trimStart()
|
||||||
@@ -272,6 +283,10 @@ export function calculateTokenCounts(
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Save results to disk
|
* Save results to disk
|
||||||
|
*
|
||||||
|
* @remarks
|
||||||
|
* Per-model results are managed separately via storage.ts
|
||||||
|
* This function only generates the aggregated markdown report
|
||||||
*/
|
*/
|
||||||
export async function saveResults(
|
export async function saveResults(
|
||||||
results: EvaluationResult[],
|
results: EvaluationResult[],
|
||||||
@@ -279,31 +294,12 @@ export async function saveResults(
|
|||||||
questions: Question[],
|
questions: Question[],
|
||||||
tokenCounts: Record<string, number>,
|
tokenCounts: Record<string, number>,
|
||||||
): Promise<string> {
|
): Promise<string> {
|
||||||
const resultsDir = path.join(BENCHMARKS_DIR, 'results', 'accuracy')
|
const resultsDir = path.join(BENCHMARKS_DIR, 'results')
|
||||||
await ensureDir(resultsDir)
|
await ensureDir(resultsDir)
|
||||||
|
|
||||||
// Save raw results
|
// Generate markdown report from all available model results
|
||||||
await saveJsonFile(path.join(resultsDir, 'raw-results.json'), results)
|
|
||||||
|
|
||||||
// Save summary
|
|
||||||
await saveJsonFile(
|
|
||||||
path.join(resultsDir, 'summary.json'),
|
|
||||||
{
|
|
||||||
formatResults,
|
|
||||||
questions: questions.length,
|
|
||||||
models: Object.keys(models),
|
|
||||||
datasets: datasets.map(d => ({ name: d.name, description: d.description })),
|
|
||||||
tokenCounts,
|
|
||||||
timestamp: new Date().toISOString(),
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
// Generate markdown report
|
|
||||||
const report = generateMarkdownReport(formatResults, results, questions, tokenCounts)
|
const report = generateMarkdownReport(formatResults, results, questions, tokenCounts)
|
||||||
await fsp.writeFile(
|
await fsp.writeFile(path.join(resultsDir, 'retrieval-accuracy.md'), report)
|
||||||
path.join(resultsDir, 'report.md'),
|
|
||||||
report,
|
|
||||||
)
|
|
||||||
|
|
||||||
return resultsDir
|
return resultsDir
|
||||||
}
|
}
|
||||||
|
|||||||
46
benchmarks/src/storage.ts
Normal file
46
benchmarks/src/storage.ts
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
import type { Storage, StorageValue } from 'unstorage'
|
||||||
|
import type { EvaluationResult } from './types'
|
||||||
|
import * as path from 'node:path'
|
||||||
|
import { createStorage } from 'unstorage'
|
||||||
|
import fsDriver from 'unstorage/drivers/fs'
|
||||||
|
import { BENCHMARKS_DIR } from './constants'
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Storage instance for model results
|
||||||
|
*
|
||||||
|
* @remarks
|
||||||
|
* Stores results in: `benchmarks/results/accuracy/models/`
|
||||||
|
*/
|
||||||
|
export const resultsStorage: Storage<StorageValue> = createStorage({
|
||||||
|
driver: fsDriver({
|
||||||
|
base: path.join(BENCHMARKS_DIR, 'results', 'accuracy', 'models'),
|
||||||
|
}),
|
||||||
|
})
|
||||||
|
|
||||||
|
export async function loadModelResults(modelId: string): Promise<EvaluationResult[] | undefined> {
|
||||||
|
const data = await resultsStorage.getItem<EvaluationResult[]>(modelId)
|
||||||
|
return data ?? undefined
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function saveModelResults(modelId: string, results: EvaluationResult[]): Promise<void> {
|
||||||
|
await resultsStorage.setItem(modelId, results)
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function getAllModelResults(): Promise<Record<string, EvaluationResult[]>> {
|
||||||
|
const keys = await resultsStorage.getKeys()
|
||||||
|
const results: Record<string, EvaluationResult[]> = {}
|
||||||
|
|
||||||
|
await Promise.all(
|
||||||
|
keys.map(async (modelId) => {
|
||||||
|
const data = await resultsStorage.getItem<EvaluationResult[]>(modelId)
|
||||||
|
if (data)
|
||||||
|
results[modelId] = data
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
|
||||||
|
return results
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function hasModelResults(modelId: string): Promise<boolean> {
|
||||||
|
return await resultsStorage.hasItem(modelId)
|
||||||
|
}
|
||||||
@@ -1,13 +1,3 @@
|
|||||||
/**
|
|
||||||
* Shared utility functions for TOON benchmarks
|
|
||||||
*
|
|
||||||
* Provides common functionality used across multiple benchmark scripts:
|
|
||||||
* - Progress bar visualization
|
|
||||||
* - Token counting
|
|
||||||
* - File I/O operations
|
|
||||||
* - Retry logic for API calls
|
|
||||||
*/
|
|
||||||
|
|
||||||
import * as fsp from 'node:fs/promises'
|
import * as fsp from 'node:fs/promises'
|
||||||
import { encode } from 'gpt-tokenizer'
|
import { encode } from 'gpt-tokenizer'
|
||||||
|
|
||||||
|
|||||||
172
pnpm-lock.yaml
generated
172
pnpm-lock.yaml
generated
@@ -56,15 +56,15 @@ importers:
|
|||||||
'@antfu/eslint-config':
|
'@antfu/eslint-config':
|
||||||
specifier: ^6.1.0
|
specifier: ^6.1.0
|
||||||
version: 6.1.0(@vue/compiler-sfc@3.5.22)(eslint@9.38.0(jiti@2.6.1))(typescript@5.9.3)(vitest@4.0.3(@types/debug@4.1.12)(@types/node@24.9.1)(jiti@2.6.1)(tsx@4.20.6)(yaml@2.8.1))
|
version: 6.1.0(@vue/compiler-sfc@3.5.22)(eslint@9.38.0(jiti@2.6.1))(typescript@5.9.3)(vitest@4.0.3(@types/debug@4.1.12)(@types/node@24.9.1)(jiti@2.6.1)(tsx@4.20.6)(yaml@2.8.1))
|
||||||
|
'@clack/prompts':
|
||||||
|
specifier: ^0.11.0
|
||||||
|
version: 0.11.0
|
||||||
'@faker-js/faker':
|
'@faker-js/faker':
|
||||||
specifier: ^10.1.0
|
specifier: ^10.1.0
|
||||||
version: 10.1.0
|
version: 10.1.0
|
||||||
ai:
|
ai:
|
||||||
specifier: ^5.0.80
|
specifier: ^5.0.80
|
||||||
version: 5.0.80(zod@4.1.12)
|
version: 5.0.80(zod@4.1.12)
|
||||||
consola:
|
|
||||||
specifier: ^3.4.2
|
|
||||||
version: 3.4.2
|
|
||||||
csv-stringify:
|
csv-stringify:
|
||||||
specifier: ^6.6.0
|
specifier: ^6.6.0
|
||||||
version: 6.6.0
|
version: 6.6.0
|
||||||
@@ -80,6 +80,12 @@ importers:
|
|||||||
p-map:
|
p-map:
|
||||||
specifier: ^7.0.3
|
specifier: ^7.0.3
|
||||||
version: 7.0.3
|
version: 7.0.3
|
||||||
|
p-queue:
|
||||||
|
specifier: ^9.0.0
|
||||||
|
version: 9.0.0
|
||||||
|
unstorage:
|
||||||
|
specifier: ^1.17.1
|
||||||
|
version: 1.17.1
|
||||||
yaml:
|
yaml:
|
||||||
specifier: ^2.8.1
|
specifier: ^2.8.1
|
||||||
version: 2.8.1
|
version: 2.8.1
|
||||||
@@ -985,6 +991,10 @@ packages:
|
|||||||
resolution: {integrity: sha512-HqZ5rWlFjGiV0tDm3UxxgNRqsOTniqoKZu0pIAfh7TZQMGuZK+hH0drySty0si0QXj1ieop4+SkSfPZBPPkHig==}
|
resolution: {integrity: sha512-HqZ5rWlFjGiV0tDm3UxxgNRqsOTniqoKZu0pIAfh7TZQMGuZK+hH0drySty0si0QXj1ieop4+SkSfPZBPPkHig==}
|
||||||
engines: {node: '>=14'}
|
engines: {node: '>=14'}
|
||||||
|
|
||||||
|
anymatch@3.1.3:
|
||||||
|
resolution: {integrity: sha512-KMReFUr0B4t+D+OBkjR3KYqvocp2XaSzO55UcB6mgQMd3KbcE+mWTyvVV7D/zsdEbNnV6acZUutkiHQXvTr1Rw==}
|
||||||
|
engines: {node: '>= 8'}
|
||||||
|
|
||||||
are-docs-informative@0.0.2:
|
are-docs-informative@0.0.2:
|
||||||
resolution: {integrity: sha512-ixiS0nLNNG5jNQzgZJNoUpBKdo9yTYZMGJ+QgT2jmjR7G7+QHRCc4v6LQ3NgE7EBJq+o0ams3waJwkrlBom8Ig==}
|
resolution: {integrity: sha512-ixiS0nLNNG5jNQzgZJNoUpBKdo9yTYZMGJ+QgT2jmjR7G7+QHRCc4v6LQ3NgE7EBJq+o0ams3waJwkrlBom8Ig==}
|
||||||
engines: {node: '>=14'}
|
engines: {node: '>=14'}
|
||||||
@@ -1119,6 +1129,9 @@ packages:
|
|||||||
resolution: {integrity: sha512-5IKcdX0nnYavi6G7TtOhwkYzyjfJlatbjMjuLSfE2kYT5pMDOilZ4OvMhi637CcDICTmz3wARPoyhqyX1Y+XvA==}
|
resolution: {integrity: sha512-5IKcdX0nnYavi6G7TtOhwkYzyjfJlatbjMjuLSfE2kYT5pMDOilZ4OvMhi637CcDICTmz3wARPoyhqyX1Y+XvA==}
|
||||||
engines: {node: ^14.18.0 || >=16.10.0}
|
engines: {node: ^14.18.0 || >=16.10.0}
|
||||||
|
|
||||||
|
cookie-es@1.2.2:
|
||||||
|
resolution: {integrity: sha512-+W7VmiVINB+ywl1HGXJXmrqkOhpKrIiVZV6tQuV54ZyQC7MMuBt81Vc336GMLoHBq5hV/F9eXgt5Mnx0Rha5Fg==}
|
||||||
|
|
||||||
core-js-compat@3.46.0:
|
core-js-compat@3.46.0:
|
||||||
resolution: {integrity: sha512-p9hObIIEENxSV8xIu+V68JjSeARg6UVMG5mR+JEUguG3sI6MsiS1njz2jHmyJDvA+8jX/sytkBHup6kxhM9law==}
|
resolution: {integrity: sha512-p9hObIIEENxSV8xIu+V68JjSeARg6UVMG5mR+JEUguG3sI6MsiS1njz2jHmyJDvA+8jX/sytkBHup6kxhM9law==}
|
||||||
|
|
||||||
@@ -1126,6 +1139,9 @@ packages:
|
|||||||
resolution: {integrity: sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==}
|
resolution: {integrity: sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==}
|
||||||
engines: {node: '>= 8'}
|
engines: {node: '>= 8'}
|
||||||
|
|
||||||
|
crossws@0.3.5:
|
||||||
|
resolution: {integrity: sha512-ojKiDvcmByhwa8YYqbQI/hg7MEU0NC03+pSdEq4ZUnZR9xXpwk7E43SMNGkn+JxJGPFtNvQ48+vV2p+P1ml5PA==}
|
||||||
|
|
||||||
cssesc@3.0.0:
|
cssesc@3.0.0:
|
||||||
resolution: {integrity: sha512-/Tb/JcjK111nNScGob5MNtsntNM1aCNUDipB/TkwZFhyDrrE47SOx/18wF2bbjgc3ZzCSKW1T5nt5EbFoAz/Vg==}
|
resolution: {integrity: sha512-/Tb/JcjK111nNScGob5MNtsntNM1aCNUDipB/TkwZFhyDrrE47SOx/18wF2bbjgc3ZzCSKW1T5nt5EbFoAz/Vg==}
|
||||||
engines: {node: '>=4'}
|
engines: {node: '>=4'}
|
||||||
@@ -1431,6 +1447,9 @@ packages:
|
|||||||
resolution: {integrity: sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==}
|
resolution: {integrity: sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==}
|
||||||
engines: {node: '>=0.10.0'}
|
engines: {node: '>=0.10.0'}
|
||||||
|
|
||||||
|
eventemitter3@5.0.1:
|
||||||
|
resolution: {integrity: sha512-GWkBvjiSZK87ELrYOSESUYeVIc9mvLLf/nXalMOS5dYrgZq9o5OVkbZAVM06CVxYsCwH9BDZFPlQTlPA1j4ahA==}
|
||||||
|
|
||||||
eventsource-parser@3.0.6:
|
eventsource-parser@3.0.6:
|
||||||
resolution: {integrity: sha512-Vo1ab+QXPzZ4tCa8SwIHJFaSzy4R6SHf7BY79rFBDf0idraZWAkYrDjDj8uWaSm3S2TK+hJ7/t1CEmZ7jXw+pg==}
|
resolution: {integrity: sha512-Vo1ab+QXPzZ4tCa8SwIHJFaSzy4R6SHf7BY79rFBDf0idraZWAkYrDjDj8uWaSm3S2TK+hJ7/t1CEmZ7jXw+pg==}
|
||||||
engines: {node: '>=18.0.0'}
|
engines: {node: '>=18.0.0'}
|
||||||
@@ -1552,6 +1571,9 @@ packages:
|
|||||||
graphemer@1.4.0:
|
graphemer@1.4.0:
|
||||||
resolution: {integrity: sha512-EtKwoO6kxCL9WO5xipiHTZlSzBm7WLT627TqC/uVRd0HKmq8NXyebnNYxDoBi7wt8eTWrUrKXCOVaFq9x1kgag==}
|
resolution: {integrity: sha512-EtKwoO6kxCL9WO5xipiHTZlSzBm7WLT627TqC/uVRd0HKmq8NXyebnNYxDoBi7wt8eTWrUrKXCOVaFq9x1kgag==}
|
||||||
|
|
||||||
|
h3@1.15.4:
|
||||||
|
resolution: {integrity: sha512-z5cFQWDffyOe4vQ9xIqNfCZdV4p//vy6fBnr8Q1AWnVZ0teurKMG66rLj++TKwKPUP3u7iMUvrvKaEUiQw2QWQ==}
|
||||||
|
|
||||||
has-flag@4.0.0:
|
has-flag@4.0.0:
|
||||||
resolution: {integrity: sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==}
|
resolution: {integrity: sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==}
|
||||||
engines: {node: '>=8'}
|
engines: {node: '>=8'}
|
||||||
@@ -1582,6 +1604,9 @@ packages:
|
|||||||
resolution: {integrity: sha512-m6FAo/spmsW2Ab2fU35JTYwtOKa2yAwXSwgjSv1TJzh4Mh7mC3lzAOVLBprb72XsTrgkEIsl7YrFNAiDiRhIGg==}
|
resolution: {integrity: sha512-m6FAo/spmsW2Ab2fU35JTYwtOKa2yAwXSwgjSv1TJzh4Mh7mC3lzAOVLBprb72XsTrgkEIsl7YrFNAiDiRhIGg==}
|
||||||
engines: {node: '>=12'}
|
engines: {node: '>=12'}
|
||||||
|
|
||||||
|
iron-webcrypto@1.2.1:
|
||||||
|
resolution: {integrity: sha512-feOM6FaSr6rEABp/eDfVseKyTMDt+KGpeB35SkVn9Tyn0CqvVsY3EwI0v5i8nMHyJnzCIQf7nsy3p41TPkJZhg==}
|
||||||
|
|
||||||
is-builtin-module@5.0.0:
|
is-builtin-module@5.0.0:
|
||||||
resolution: {integrity: sha512-f4RqJKBUe5rQkJ2eJEJBXSticB3hGbN9j0yxxMQFqIW89Jp9WYFtzfTcRlstDKVUTRzSOTLKRfO9vIztenwtxA==}
|
resolution: {integrity: sha512-f4RqJKBUe5rQkJ2eJEJBXSticB3hGbN9j0yxxMQFqIW89Jp9WYFtzfTcRlstDKVUTRzSOTLKRfO9vIztenwtxA==}
|
||||||
engines: {node: '>=18.20'}
|
engines: {node: '>=18.20'}
|
||||||
@@ -1680,6 +1705,9 @@ packages:
|
|||||||
longest-streak@3.1.0:
|
longest-streak@3.1.0:
|
||||||
resolution: {integrity: sha512-9Ri+o0JYgehTaVBBDoMqIl8GXtbWg711O3srftcHhZ0dqnETqLaoIK0x17fUw9rFSlK/0NlsKe0Ahhyl5pXE2g==}
|
resolution: {integrity: sha512-9Ri+o0JYgehTaVBBDoMqIl8GXtbWg711O3srftcHhZ0dqnETqLaoIK0x17fUw9rFSlK/0NlsKe0Ahhyl5pXE2g==}
|
||||||
|
|
||||||
|
lru-cache@10.4.3:
|
||||||
|
resolution: {integrity: sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ==}
|
||||||
|
|
||||||
magic-string@0.30.21:
|
magic-string@0.30.21:
|
||||||
resolution: {integrity: sha512-vd2F4YUyEXKGcLHoq+TEyCjxueSeHnFxyyjNp80yg0XV4vUhnDer/lvvlqM/arB5bXQN5K2/3oinyCRyx8T2CQ==}
|
resolution: {integrity: sha512-vd2F4YUyEXKGcLHoq+TEyCjxueSeHnFxyyjNp80yg0XV4vUhnDer/lvvlqM/arB5bXQN5K2/3oinyCRyx8T2CQ==}
|
||||||
|
|
||||||
@@ -1854,9 +1882,16 @@ packages:
|
|||||||
node-fetch-native@1.6.7:
|
node-fetch-native@1.6.7:
|
||||||
resolution: {integrity: sha512-g9yhqoedzIUm0nTnTqAQvueMPVOuIY16bqgAJJC8XOOubYFNwz6IER9qs0Gq2Xd0+CecCKFjtdDTMA4u4xG06Q==}
|
resolution: {integrity: sha512-g9yhqoedzIUm0nTnTqAQvueMPVOuIY16bqgAJJC8XOOubYFNwz6IER9qs0Gq2Xd0+CecCKFjtdDTMA4u4xG06Q==}
|
||||||
|
|
||||||
|
node-mock-http@1.0.3:
|
||||||
|
resolution: {integrity: sha512-jN8dK25fsfnMrVsEhluUTPkBFY+6ybu7jSB1n+ri/vOGjJxU8J9CZhpSGkHXSkFjtUhbmoncG/YG9ta5Ludqog==}
|
||||||
|
|
||||||
node-releases@2.0.26:
|
node-releases@2.0.26:
|
||||||
resolution: {integrity: sha512-S2M9YimhSjBSvYnlr5/+umAnPHE++ODwt5e2Ij6FoX45HA/s4vHdkDx1eax2pAPeAOqu4s9b7ppahsyEFdVqQA==}
|
resolution: {integrity: sha512-S2M9YimhSjBSvYnlr5/+umAnPHE++ODwt5e2Ij6FoX45HA/s4vHdkDx1eax2pAPeAOqu4s9b7ppahsyEFdVqQA==}
|
||||||
|
|
||||||
|
normalize-path@3.0.0:
|
||||||
|
resolution: {integrity: sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==}
|
||||||
|
engines: {node: '>=0.10.0'}
|
||||||
|
|
||||||
nth-check@2.1.1:
|
nth-check@2.1.1:
|
||||||
resolution: {integrity: sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==}
|
resolution: {integrity: sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==}
|
||||||
|
|
||||||
@@ -1890,6 +1925,14 @@ packages:
|
|||||||
resolution: {integrity: sha512-VkndIv2fIB99swvQoA65bm+fsmt6UNdGeIB0oxBs+WhAhdh08QA04JXpI7rbB9r08/nkbysKoya9rtDERYOYMA==}
|
resolution: {integrity: sha512-VkndIv2fIB99swvQoA65bm+fsmt6UNdGeIB0oxBs+WhAhdh08QA04JXpI7rbB9r08/nkbysKoya9rtDERYOYMA==}
|
||||||
engines: {node: '>=18'}
|
engines: {node: '>=18'}
|
||||||
|
|
||||||
|
p-queue@9.0.0:
|
||||||
|
resolution: {integrity: sha512-KO1RyxstL9g1mK76530TExamZC/S2Glm080Nx8PE5sTd7nlduDQsAfEl4uXX+qZjLiwvDauvzXavufy3+rJ9zQ==}
|
||||||
|
engines: {node: '>=20'}
|
||||||
|
|
||||||
|
p-timeout@7.0.1:
|
||||||
|
resolution: {integrity: sha512-AxTM2wDGORHGEkPCt8yqxOTMgpfbEHqF51f/5fJCmwFC3C/zNcGT63SymH2ttOAaiIws2zVg4+izQCjrakcwHg==}
|
||||||
|
engines: {node: '>=20'}
|
||||||
|
|
||||||
package-manager-detector@1.5.0:
|
package-manager-detector@1.5.0:
|
||||||
resolution: {integrity: sha512-uBj69dVlYe/+wxj8JOpr97XfsxH/eumMt6HqjNTmJDf/6NO9s+0uxeOneIz3AsPt2m6y9PqzDzd3ATcU17MNfw==}
|
resolution: {integrity: sha512-uBj69dVlYe/+wxj8JOpr97XfsxH/eumMt6HqjNTmJDf/6NO9s+0uxeOneIz3AsPt2m6y9PqzDzd3ATcU17MNfw==}
|
||||||
|
|
||||||
@@ -1967,6 +2010,9 @@ packages:
|
|||||||
queue-microtask@1.2.3:
|
queue-microtask@1.2.3:
|
||||||
resolution: {integrity: sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A==}
|
resolution: {integrity: sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A==}
|
||||||
|
|
||||||
|
radix3@1.1.2:
|
||||||
|
resolution: {integrity: sha512-b484I/7b8rDEdSDKckSSBA8knMpcdsXudlE/LNL639wFoHKwLbEkQFZHWEYwDC0wa0FKUcCY+GAF73Z7wxNVFA==}
|
||||||
|
|
||||||
rc9@2.1.2:
|
rc9@2.1.2:
|
||||||
resolution: {integrity: sha512-btXCnMmRIBINM2LDZoEmOogIZU7Qe7zn4BpomSKZ/ykbLObuBdvG+mFq11DL6fjH1DRwHhrlgtYWG96bJiC7Cg==}
|
resolution: {integrity: sha512-btXCnMmRIBINM2LDZoEmOogIZU7Qe7zn4BpomSKZ/ykbLObuBdvG+mFq11DL6fjH1DRwHhrlgtYWG96bJiC7Cg==}
|
||||||
|
|
||||||
@@ -2194,6 +2240,9 @@ packages:
|
|||||||
unconfig@7.3.3:
|
unconfig@7.3.3:
|
||||||
resolution: {integrity: sha512-QCkQoOnJF8L107gxfHL0uavn7WD9b3dpBcFX6HtfQYmjw2YzWxGuFQ0N0J6tE9oguCBJn9KOvfqYDCMPHIZrBA==}
|
resolution: {integrity: sha512-QCkQoOnJF8L107gxfHL0uavn7WD9b3dpBcFX6HtfQYmjw2YzWxGuFQ0N0J6tE9oguCBJn9KOvfqYDCMPHIZrBA==}
|
||||||
|
|
||||||
|
uncrypto@0.1.3:
|
||||||
|
resolution: {integrity: sha512-Ql87qFHB3s/De2ClA9e0gsnS6zXG27SkTiSJwjCc9MebbfapQfuPzumMIUMi38ezPZVNFcHI9sUIepeQfw8J8Q==}
|
||||||
|
|
||||||
undici-types@7.16.0:
|
undici-types@7.16.0:
|
||||||
resolution: {integrity: sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw==}
|
resolution: {integrity: sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw==}
|
||||||
|
|
||||||
@@ -2214,6 +2263,68 @@ packages:
|
|||||||
engines: {node: '>=20.19.0'}
|
engines: {node: '>=20.19.0'}
|
||||||
hasBin: true
|
hasBin: true
|
||||||
|
|
||||||
|
unstorage@1.17.1:
|
||||||
|
resolution: {integrity: sha512-KKGwRTT0iVBCErKemkJCLs7JdxNVfqTPc/85ae1XES0+bsHbc/sFBfVi5kJp156cc51BHinIH2l3k0EZ24vOBQ==}
|
||||||
|
peerDependencies:
|
||||||
|
'@azure/app-configuration': ^1.8.0
|
||||||
|
'@azure/cosmos': ^4.2.0
|
||||||
|
'@azure/data-tables': ^13.3.0
|
||||||
|
'@azure/identity': ^4.6.0
|
||||||
|
'@azure/keyvault-secrets': ^4.9.0
|
||||||
|
'@azure/storage-blob': ^12.26.0
|
||||||
|
'@capacitor/preferences': ^6.0.3 || ^7.0.0
|
||||||
|
'@deno/kv': '>=0.9.0'
|
||||||
|
'@netlify/blobs': ^6.5.0 || ^7.0.0 || ^8.1.0 || ^9.0.0 || ^10.0.0
|
||||||
|
'@planetscale/database': ^1.19.0
|
||||||
|
'@upstash/redis': ^1.34.3
|
||||||
|
'@vercel/blob': '>=0.27.1'
|
||||||
|
'@vercel/functions': ^2.2.12 || ^3.0.0
|
||||||
|
'@vercel/kv': ^1.0.1
|
||||||
|
aws4fetch: ^1.0.20
|
||||||
|
db0: '>=0.2.1'
|
||||||
|
idb-keyval: ^6.2.1
|
||||||
|
ioredis: ^5.4.2
|
||||||
|
uploadthing: ^7.4.4
|
||||||
|
peerDependenciesMeta:
|
||||||
|
'@azure/app-configuration':
|
||||||
|
optional: true
|
||||||
|
'@azure/cosmos':
|
||||||
|
optional: true
|
||||||
|
'@azure/data-tables':
|
||||||
|
optional: true
|
||||||
|
'@azure/identity':
|
||||||
|
optional: true
|
||||||
|
'@azure/keyvault-secrets':
|
||||||
|
optional: true
|
||||||
|
'@azure/storage-blob':
|
||||||
|
optional: true
|
||||||
|
'@capacitor/preferences':
|
||||||
|
optional: true
|
||||||
|
'@deno/kv':
|
||||||
|
optional: true
|
||||||
|
'@netlify/blobs':
|
||||||
|
optional: true
|
||||||
|
'@planetscale/database':
|
||||||
|
optional: true
|
||||||
|
'@upstash/redis':
|
||||||
|
optional: true
|
||||||
|
'@vercel/blob':
|
||||||
|
optional: true
|
||||||
|
'@vercel/functions':
|
||||||
|
optional: true
|
||||||
|
'@vercel/kv':
|
||||||
|
optional: true
|
||||||
|
aws4fetch:
|
||||||
|
optional: true
|
||||||
|
db0:
|
||||||
|
optional: true
|
||||||
|
idb-keyval:
|
||||||
|
optional: true
|
||||||
|
ioredis:
|
||||||
|
optional: true
|
||||||
|
uploadthing:
|
||||||
|
optional: true
|
||||||
|
|
||||||
untyped@2.0.0:
|
untyped@2.0.0:
|
||||||
resolution: {integrity: sha512-nwNCjxJTjNuLCgFr42fEak5OcLuB3ecca+9ksPFNvtfYSLpjf+iJqSIaSnIile6ZPbKYxI5k2AfXqeopGudK/g==}
|
resolution: {integrity: sha512-nwNCjxJTjNuLCgFr42fEak5OcLuB3ecca+9ksPFNvtfYSLpjf+iJqSIaSnIile6ZPbKYxI5k2AfXqeopGudK/g==}
|
||||||
hasBin: true
|
hasBin: true
|
||||||
@@ -3143,6 +3254,11 @@ snapshots:
|
|||||||
|
|
||||||
ansis@4.2.0: {}
|
ansis@4.2.0: {}
|
||||||
|
|
||||||
|
anymatch@3.1.3:
|
||||||
|
dependencies:
|
||||||
|
normalize-path: 3.0.0
|
||||||
|
picomatch: 2.3.1
|
||||||
|
|
||||||
are-docs-informative@0.0.2: {}
|
are-docs-informative@0.0.2: {}
|
||||||
|
|
||||||
argparse@2.0.1: {}
|
argparse@2.0.1: {}
|
||||||
@@ -3289,6 +3405,8 @@ snapshots:
|
|||||||
|
|
||||||
consola@3.4.2: {}
|
consola@3.4.2: {}
|
||||||
|
|
||||||
|
cookie-es@1.2.2: {}
|
||||||
|
|
||||||
core-js-compat@3.46.0:
|
core-js-compat@3.46.0:
|
||||||
dependencies:
|
dependencies:
|
||||||
browserslist: 4.27.0
|
browserslist: 4.27.0
|
||||||
@@ -3299,6 +3417,10 @@ snapshots:
|
|||||||
shebang-command: 2.0.0
|
shebang-command: 2.0.0
|
||||||
which: 2.0.2
|
which: 2.0.2
|
||||||
|
|
||||||
|
crossws@0.3.5:
|
||||||
|
dependencies:
|
||||||
|
uncrypto: 0.1.3
|
||||||
|
|
||||||
cssesc@3.0.0: {}
|
cssesc@3.0.0: {}
|
||||||
|
|
||||||
csv-stringify@6.6.0: {}
|
csv-stringify@6.6.0: {}
|
||||||
@@ -3674,6 +3796,8 @@ snapshots:
|
|||||||
|
|
||||||
esutils@2.0.3: {}
|
esutils@2.0.3: {}
|
||||||
|
|
||||||
|
eventemitter3@5.0.1: {}
|
||||||
|
|
||||||
eventsource-parser@3.0.6: {}
|
eventsource-parser@3.0.6: {}
|
||||||
|
|
||||||
expect-type@1.2.2: {}
|
expect-type@1.2.2: {}
|
||||||
@@ -3776,6 +3900,18 @@ snapshots:
|
|||||||
|
|
||||||
graphemer@1.4.0: {}
|
graphemer@1.4.0: {}
|
||||||
|
|
||||||
|
h3@1.15.4:
|
||||||
|
dependencies:
|
||||||
|
cookie-es: 1.2.2
|
||||||
|
crossws: 0.3.5
|
||||||
|
defu: 6.1.4
|
||||||
|
destr: 2.0.5
|
||||||
|
iron-webcrypto: 1.2.1
|
||||||
|
node-mock-http: 1.0.3
|
||||||
|
radix3: 1.1.2
|
||||||
|
ufo: 1.6.1
|
||||||
|
uncrypto: 0.1.3
|
||||||
|
|
||||||
has-flag@4.0.0: {}
|
has-flag@4.0.0: {}
|
||||||
|
|
||||||
hookable@5.5.3: {}
|
hookable@5.5.3: {}
|
||||||
@@ -3795,6 +3931,8 @@ snapshots:
|
|||||||
|
|
||||||
indent-string@5.0.0: {}
|
indent-string@5.0.0: {}
|
||||||
|
|
||||||
|
iron-webcrypto@1.2.1: {}
|
||||||
|
|
||||||
is-builtin-module@5.0.0:
|
is-builtin-module@5.0.0:
|
||||||
dependencies:
|
dependencies:
|
||||||
builtin-modules: 5.0.0
|
builtin-modules: 5.0.0
|
||||||
@@ -3871,6 +4009,8 @@ snapshots:
|
|||||||
|
|
||||||
longest-streak@3.1.0: {}
|
longest-streak@3.1.0: {}
|
||||||
|
|
||||||
|
lru-cache@10.4.3: {}
|
||||||
|
|
||||||
magic-string@0.30.21:
|
magic-string@0.30.21:
|
||||||
dependencies:
|
dependencies:
|
||||||
'@jridgewell/sourcemap-codec': 1.5.5
|
'@jridgewell/sourcemap-codec': 1.5.5
|
||||||
@@ -4228,8 +4368,12 @@ snapshots:
|
|||||||
|
|
||||||
node-fetch-native@1.6.7: {}
|
node-fetch-native@1.6.7: {}
|
||||||
|
|
||||||
|
node-mock-http@1.0.3: {}
|
||||||
|
|
||||||
node-releases@2.0.26: {}
|
node-releases@2.0.26: {}
|
||||||
|
|
||||||
|
normalize-path@3.0.0: {}
|
||||||
|
|
||||||
nth-check@2.1.1:
|
nth-check@2.1.1:
|
||||||
dependencies:
|
dependencies:
|
||||||
boolbase: 1.0.0
|
boolbase: 1.0.0
|
||||||
@@ -4271,6 +4415,13 @@ snapshots:
|
|||||||
|
|
||||||
p-map@7.0.3: {}
|
p-map@7.0.3: {}
|
||||||
|
|
||||||
|
p-queue@9.0.0:
|
||||||
|
dependencies:
|
||||||
|
eventemitter3: 5.0.1
|
||||||
|
p-timeout: 7.0.1
|
||||||
|
|
||||||
|
p-timeout@7.0.1: {}
|
||||||
|
|
||||||
package-manager-detector@1.5.0: {}
|
package-manager-detector@1.5.0: {}
|
||||||
|
|
||||||
parent-module@1.0.1:
|
parent-module@1.0.1:
|
||||||
@@ -4336,6 +4487,8 @@ snapshots:
|
|||||||
|
|
||||||
queue-microtask@1.2.3: {}
|
queue-microtask@1.2.3: {}
|
||||||
|
|
||||||
|
radix3@1.1.2: {}
|
||||||
|
|
||||||
rc9@2.1.2:
|
rc9@2.1.2:
|
||||||
dependencies:
|
dependencies:
|
||||||
defu: 6.1.4
|
defu: 6.1.4
|
||||||
@@ -4575,6 +4728,8 @@ snapshots:
|
|||||||
jiti: 2.6.1
|
jiti: 2.6.1
|
||||||
quansync: 0.2.11
|
quansync: 0.2.11
|
||||||
|
|
||||||
|
uncrypto@0.1.3: {}
|
||||||
|
|
||||||
undici-types@7.16.0: {}
|
undici-types@7.16.0: {}
|
||||||
|
|
||||||
unist-util-is@6.0.1:
|
unist-util-is@6.0.1:
|
||||||
@@ -4602,6 +4757,17 @@ snapshots:
|
|||||||
rolldown: 1.0.0-beta.44
|
rolldown: 1.0.0-beta.44
|
||||||
synckit: 0.11.11
|
synckit: 0.11.11
|
||||||
|
|
||||||
|
unstorage@1.17.1:
|
||||||
|
dependencies:
|
||||||
|
anymatch: 3.1.3
|
||||||
|
chokidar: 4.0.3
|
||||||
|
destr: 2.0.5
|
||||||
|
h3: 1.15.4
|
||||||
|
lru-cache: 10.4.3
|
||||||
|
node-fetch-native: 1.6.7
|
||||||
|
ofetch: 1.4.1
|
||||||
|
ufo: 1.6.1
|
||||||
|
|
||||||
untyped@2.0.0:
|
untyped@2.0.0:
|
||||||
dependencies:
|
dependencies:
|
||||||
citty: 0.1.6
|
citty: 0.1.6
|
||||||
|
|||||||
Reference in New Issue
Block a user