docs: update benchmarks for v3 list item syntax

This commit is contained in:
Johann Schopplich
2025-11-24 16:35:44 +01:00
parent 14a8cb132e
commit 9a6125424c
5 changed files with 100 additions and 101 deletions

View File

@@ -198,14 +198,14 @@ Benchmarks test LLM comprehension across different input formats using 209 data
Each format's overall performance, balancing accuracy against token cost:
```
TOON ████████████████████ 26.8 │ 73.9% acc │ 2,759 tokens
TOON ████████████████████ 26.9 │ 73.9% acc │ 2,744 tokens
JSON compact █████████████████░░░ 22.9 │ 70.7% acc │ 3,081 tokens
YAML ██████████████░░░░░░ 18.6 │ 69.0% acc │ 3,719 tokens
JSON ███████████░░░░░░░░░ 15.3 │ 69.7% acc │ 4,545 tokens
XML ██████████░░░░░░░░░░ 13.0 │ 67.1% acc │ 5,167 tokens
```
TOON achieves **73.9%** accuracy (vs JSON's 69.7%) while using **39.3% fewer tokens**.
TOON achieves **73.9%** accuracy (vs JSON's 69.7%) while using **39.6% fewer tokens**.
**Note on CSV:** Excluded from ranking as it only supports 109 of 209 questions (flat tabular data only). While CSV is highly token-efficient for simple tabular data, it cannot represent nested structures that other formats handle.
@@ -248,7 +248,7 @@ grok-4-fast-non-reasoning
```
> [!TIP] Results Summary
> TOON achieves **73.9% accuracy** (vs JSON's 69.7%) while using **39.3% fewer tokens** on these datasets.
> TOON achieves **73.9% accuracy** (vs JSON's 69.7%) while using **39.6% fewer tokens** on these datasets.
<details>
<summary><strong>Performance by dataset, model, and question type</strong></summary>
@@ -280,7 +280,7 @@ grok-4-fast-non-reasoning
| Format | Accuracy | Tokens | Correct/Total |
| ------ | -------- | ------ | ------------- |
| `toon` | 81.1% | 7,282 | 133/164 |
| `toon` | 81.1% | 7,232 | 133/164 |
| `json-compact` | 76.8% | 6,794 | 126/164 |
| `yaml` | 75.6% | 8,347 | 124/164 |
| `json-pretty` | 76.2% | 10,713 | 125/164 |
@@ -313,7 +313,7 @@ grok-4-fast-non-reasoning
| Format | Accuracy | Tokens | Correct/Total |
| ------ | -------- | ------ | ------------- |
| `json-compact` | 63.3% | 4,819 | 76/120 |
| `toon` | 57.5% | 5,874 | 69/120 |
| `toon` | 57.5% | 5,799 | 69/120 |
| `json-pretty` | 59.2% | 6,797 | 71/120 |
| `yaml` | 48.3% | 5,827 | 58/120 |
| `xml` | 46.7% | 7,709 | 56/120 |
@@ -323,7 +323,7 @@ grok-4-fast-non-reasoning
| Format | Accuracy | Tokens | Correct/Total |
| ------ | -------- | ------ | ------------- |
| `json-compact` | 92.2% | 574 | 107/116 |
| `toon` | 95.7% | 671 | 111/116 |
| `toon` | 95.7% | 666 | 111/116 |
| `yaml` | 91.4% | 686 | 106/116 |
| `json-pretty` | 94.0% | 932 | 109/116 |
| `xml` | 92.2% | 1,018 | 107/116 |
@@ -367,7 +367,7 @@ grok-4-fast-non-reasoning
| ------ | -------- | ------ | ------------- |
| `csv` | 75.0% | 489 | 3/4 |
| `yaml` | 100.0% | 996 | 4/4 |
| `toon` | 100.0% | 1,039 | 4/4 |
| `toon` | 100.0% | 1,019 | 4/4 |
| `json-compact` | 75.0% | 790 | 3/4 |
| `xml` | 100.0% | 1,458 | 4/4 |
| `json-pretty` | 75.0% | 1,274 | 3/4 |
@@ -378,7 +378,7 @@ grok-4-fast-non-reasoning
| ------ | -------- | ------ | ------------- |
| `csv` | 100.0% | 329 | 4/4 |
| `xml` | 100.0% | 1,411 | 4/4 |
| `toon` | 75.0% | 1,003 | 3/4 |
| `toon` | 75.0% | 983 | 3/4 |
| `yaml` | 25.0% | 960 | 1/4 |
| `json-pretty` | 25.0% | 1,230 | 1/4 |
| `json-compact` | 0.0% | 755 | 0/4 |
@@ -514,34 +514,34 @@ Datasets with nested or semi-uniform structures. CSV excluded as it cannot prope
```
🛒 E-commerce orders with nested structures ┊ Tabular: 33%
TOON █████████████░░░░░░░ 73,271 tokens
├─ vs JSON (32.7%) 108,806 tokens
├─ vs JSON compact (+6.2%) 68,975 tokens
├─ vs YAML (13.6%) 84,780 tokens
└─ vs XML (40.1%) 122,406 tokens
TOON █████████████░░░░░░░ 72,771 tokens
├─ vs JSON (33.1%) 108,806 tokens
├─ vs JSON compact (+5.5%) 68,975 tokens
├─ vs YAML (14.2%) 84,780 tokens
└─ vs XML (40.5%) 122,406 tokens
🧾 Semi-uniform event logs ┊ Tabular: 50%
TOON █████████████████░░░ 155,211 tokens
├─ vs JSON (13.9%) 180,176 tokens
├─ vs JSON compact (+21.5%) 127,731 tokens
├─ vs YAML (+0.5%) 154,505 tokens
└─ vs XML (24.2%) 204,777 tokens
TOON █████████████████░░░ 153,211 tokens
├─ vs JSON (15.0%) 180,176 tokens
├─ vs JSON compact (+19.9%) 127,731 tokens
├─ vs YAML (0.8%) 154,505 tokens
└─ vs XML (25.2%) 204,777 tokens
🧩 Deeply nested configuration ┊ Tabular: 0%
TOON ██████████████░░░░░░ 636 tokens
├─ vs JSON (30.8%) 919 tokens
├─ vs JSON compact (+12.8%) 564 tokens
├─ vs YAML (5.5%) 673 tokens
└─ vs XML (36.9%) 1,008 tokens
TOON ██████████████░░░░░░ 631 tokens
├─ vs JSON (31.3%) 919 tokens
├─ vs JSON compact (+11.9%) 564 tokens
├─ vs YAML (6.2%) 673 tokens
└─ vs XML (37.4%) 1,008 tokens
──────────────────────────────────── Total ────────────────────────────────────
TOON ████████████████░░░░ 229,118 tokens
├─ vs JSON (21.0%) 289,901 tokens
├─ vs JSON compact (+16.1%) 197,270 tokens
├─ vs YAML (4.5%) 239,958 tokens
└─ vs XML (30.2%) 328,191 tokens
TOON ████████████████░░░░ 226,613 tokens
├─ vs JSON (21.8%) 289,901 tokens
├─ vs JSON compact (+14.9%) 197,270 tokens
├─ vs YAML (5.6%) 239,958 tokens
└─ vs XML (31.0%) 328,191 tokens
```
#### Flat-Only Track

View File

@@ -36,14 +36,14 @@ Benchmarks test LLM comprehension across different input formats using 209 data
Each format's overall performance, balancing accuracy against token cost:
```
TOON ████████████████████ 26.8 │ 73.9% acc │ 2,759 tokens
TOON ████████████████████ 26.9 │ 73.9% acc │ 2,744 tokens
JSON compact █████████████████░░░ 22.9 │ 70.7% acc │ 3,081 tokens
YAML ██████████████░░░░░░ 18.6 │ 69.0% acc │ 3,719 tokens
JSON ███████████░░░░░░░░░ 15.3 │ 69.7% acc │ 4,545 tokens
XML ██████████░░░░░░░░░░ 13.0 │ 67.1% acc │ 5,167 tokens
```
TOON achieves **73.9%** accuracy (vs JSON's 69.7%) while using **39.3% fewer tokens**.
TOON achieves **73.9%** accuracy (vs JSON's 69.7%) while using **39.6% fewer tokens**.
**Note on CSV:** Excluded from ranking as it only supports 109 of 209 questions (flat tabular data only). While CSV is highly token-efficient for simple tabular data, it cannot represent nested structures that other formats handle.
@@ -86,7 +86,7 @@ grok-4-fast-non-reasoning
```
> [!TIP] Results Summary
> TOON achieves **73.9% accuracy** (vs JSON's 69.7%) while using **39.3% fewer tokens** on these datasets.
> TOON achieves **73.9% accuracy** (vs JSON's 69.7%) while using **39.6% fewer tokens** on these datasets.
<details>
<summary><strong>Performance by dataset, model, and question type</strong></summary>
@@ -118,7 +118,7 @@ grok-4-fast-non-reasoning
| Format | Accuracy | Tokens | Correct/Total |
| ------ | -------- | ------ | ------------- |
| `toon` | 81.1% | 7,282 | 133/164 |
| `toon` | 81.1% | 7,232 | 133/164 |
| `json-compact` | 76.8% | 6,794 | 126/164 |
| `yaml` | 75.6% | 8,347 | 124/164 |
| `json-pretty` | 76.2% | 10,713 | 125/164 |
@@ -151,7 +151,7 @@ grok-4-fast-non-reasoning
| Format | Accuracy | Tokens | Correct/Total |
| ------ | -------- | ------ | ------------- |
| `json-compact` | 63.3% | 4,819 | 76/120 |
| `toon` | 57.5% | 5,874 | 69/120 |
| `toon` | 57.5% | 5,799 | 69/120 |
| `json-pretty` | 59.2% | 6,797 | 71/120 |
| `yaml` | 48.3% | 5,827 | 58/120 |
| `xml` | 46.7% | 7,709 | 56/120 |
@@ -161,7 +161,7 @@ grok-4-fast-non-reasoning
| Format | Accuracy | Tokens | Correct/Total |
| ------ | -------- | ------ | ------------- |
| `json-compact` | 92.2% | 574 | 107/116 |
| `toon` | 95.7% | 671 | 111/116 |
| `toon` | 95.7% | 666 | 111/116 |
| `yaml` | 91.4% | 686 | 106/116 |
| `json-pretty` | 94.0% | 932 | 109/116 |
| `xml` | 92.2% | 1,018 | 107/116 |
@@ -205,7 +205,7 @@ grok-4-fast-non-reasoning
| ------ | -------- | ------ | ------------- |
| `csv` | 75.0% | 489 | 3/4 |
| `yaml` | 100.0% | 996 | 4/4 |
| `toon` | 100.0% | 1,039 | 4/4 |
| `toon` | 100.0% | 1,019 | 4/4 |
| `json-compact` | 75.0% | 790 | 3/4 |
| `xml` | 100.0% | 1,458 | 4/4 |
| `json-pretty` | 75.0% | 1,274 | 3/4 |
@@ -216,7 +216,7 @@ grok-4-fast-non-reasoning
| ------ | -------- | ------ | ------------- |
| `csv` | 100.0% | 329 | 4/4 |
| `xml` | 100.0% | 1,411 | 4/4 |
| `toon` | 75.0% | 1,003 | 3/4 |
| `toon` | 75.0% | 983 | 3/4 |
| `yaml` | 25.0% | 960 | 1/4 |
| `json-pretty` | 25.0% | 1,230 | 1/4 |
| `json-compact` | 0.0% | 755 | 0/4 |

View File

@@ -5,34 +5,34 @@ Datasets with nested or semi-uniform structures. CSV excluded as it cannot prope
```
🛒 E-commerce orders with nested structures ┊ Tabular: 33%
TOON █████████████░░░░░░░ 73,271 tokens
├─ vs JSON (32.7%) 108,806 tokens
├─ vs JSON compact (+6.2%) 68,975 tokens
├─ vs YAML (13.6%) 84,780 tokens
└─ vs XML (40.1%) 122,406 tokens
TOON █████████████░░░░░░░ 72,771 tokens
├─ vs JSON (33.1%) 108,806 tokens
├─ vs JSON compact (+5.5%) 68,975 tokens
├─ vs YAML (14.2%) 84,780 tokens
└─ vs XML (40.5%) 122,406 tokens
🧾 Semi-uniform event logs ┊ Tabular: 50%
TOON █████████████████░░░ 155,211 tokens
├─ vs JSON (13.9%) 180,176 tokens
├─ vs JSON compact (+21.5%) 127,731 tokens
├─ vs YAML (+0.5%) 154,505 tokens
└─ vs XML (24.2%) 204,777 tokens
TOON █████████████████░░░ 153,211 tokens
├─ vs JSON (15.0%) 180,176 tokens
├─ vs JSON compact (+19.9%) 127,731 tokens
├─ vs YAML (0.8%) 154,505 tokens
└─ vs XML (25.2%) 204,777 tokens
🧩 Deeply nested configuration ┊ Tabular: 0%
TOON ██████████████░░░░░░ 636 tokens
├─ vs JSON (30.8%) 919 tokens
├─ vs JSON compact (+12.8%) 564 tokens
├─ vs YAML (5.5%) 673 tokens
└─ vs XML (36.9%) 1,008 tokens
TOON ██████████████░░░░░░ 631 tokens
├─ vs JSON (31.3%) 919 tokens
├─ vs JSON compact (+11.9%) 564 tokens
├─ vs YAML (6.2%) 673 tokens
└─ vs XML (37.4%) 1,008 tokens
──────────────────────────────────── Total ────────────────────────────────────
TOON ████████████████░░░░ 229,118 tokens
├─ vs JSON (21.0%) 289,901 tokens
├─ vs JSON compact (+16.1%) 197,270 tokens
├─ vs YAML (4.5%) 239,958 tokens
└─ vs XML (30.2%) 328,191 tokens
TOON ████████████████░░░░ 226,613 tokens
├─ vs JSON (21.8%) 289,901 tokens
├─ vs JSON compact (+14.9%) 197,270 tokens
├─ vs YAML (5.6%) 239,958 tokens
└─ vs XML (31.0%) 328,191 tokens
```
#### Flat-Only Track

View File

@@ -52,14 +52,14 @@ Benchmarks test LLM comprehension across different input formats using 209 data
Each format's overall performance, balancing accuracy against token cost:
```
TOON ████████████████████ 26.8 │ 73.9% acc │ 2,759 tokens
TOON ████████████████████ 26.9 │ 73.9% acc │ 2,744 tokens
JSON compact █████████████████░░░ 22.9 │ 70.7% acc │ 3,081 tokens
YAML ██████████████░░░░░░ 18.6 │ 69.0% acc │ 3,719 tokens
JSON ███████████░░░░░░░░░ 15.3 │ 69.7% acc │ 4,545 tokens
XML ██████████░░░░░░░░░░ 13.0 │ 67.1% acc │ 5,167 tokens
```
TOON achieves **73.9%** accuracy (vs JSON's 69.7%) while using **39.3% fewer tokens**.
TOON achieves **73.9%** accuracy (vs JSON's 69.7%) while using **39.6% fewer tokens**.
**Note on CSV:** Excluded from ranking as it only supports 109 of 209 questions (flat tabular data only). While CSV is highly token-efficient for simple tabular data, it cannot represent nested structures that other formats handle.
@@ -102,7 +102,7 @@ grok-4-fast-non-reasoning
```
> [!TIP] Results Summary
> TOON achieves **73.9% accuracy** (vs JSON's 69.7%) while using **39.3% fewer tokens** on these datasets.
> TOON achieves **73.9% accuracy** (vs JSON's 69.7%) while using **39.6% fewer tokens** on these datasets.
<details>
<summary><strong>Performance by dataset, model, and question type</strong></summary>
@@ -134,7 +134,7 @@ grok-4-fast-non-reasoning
| Format | Accuracy | Tokens | Correct/Total |
| ------ | -------- | ------ | ------------- |
| `toon` | 81.1% | 7,282 | 133/164 |
| `toon` | 81.1% | 7,232 | 133/164 |
| `json-compact` | 76.8% | 6,794 | 126/164 |
| `yaml` | 75.6% | 8,347 | 124/164 |
| `json-pretty` | 76.2% | 10,713 | 125/164 |
@@ -167,7 +167,7 @@ grok-4-fast-non-reasoning
| Format | Accuracy | Tokens | Correct/Total |
| ------ | -------- | ------ | ------------- |
| `json-compact` | 63.3% | 4,819 | 76/120 |
| `toon` | 57.5% | 5,874 | 69/120 |
| `toon` | 57.5% | 5,799 | 69/120 |
| `json-pretty` | 59.2% | 6,797 | 71/120 |
| `yaml` | 48.3% | 5,827 | 58/120 |
| `xml` | 46.7% | 7,709 | 56/120 |
@@ -177,7 +177,7 @@ grok-4-fast-non-reasoning
| Format | Accuracy | Tokens | Correct/Total |
| ------ | -------- | ------ | ------------- |
| `json-compact` | 92.2% | 574 | 107/116 |
| `toon` | 95.7% | 671 | 111/116 |
| `toon` | 95.7% | 666 | 111/116 |
| `yaml` | 91.4% | 686 | 106/116 |
| `json-pretty` | 94.0% | 932 | 109/116 |
| `xml` | 92.2% | 1,018 | 107/116 |
@@ -221,7 +221,7 @@ grok-4-fast-non-reasoning
| ------ | -------- | ------ | ------------- |
| `csv` | 75.0% | 489 | 3/4 |
| `yaml` | 100.0% | 996 | 4/4 |
| `toon` | 100.0% | 1,039 | 4/4 |
| `toon` | 100.0% | 1,019 | 4/4 |
| `json-compact` | 75.0% | 790 | 3/4 |
| `xml` | 100.0% | 1,458 | 4/4 |
| `json-pretty` | 75.0% | 1,274 | 3/4 |
@@ -232,7 +232,7 @@ grok-4-fast-non-reasoning
| ------ | -------- | ------ | ------------- |
| `csv` | 100.0% | 329 | 4/4 |
| `xml` | 100.0% | 1,411 | 4/4 |
| `toon` | 75.0% | 1,003 | 3/4 |
| `toon` | 75.0% | 983 | 3/4 |
| `yaml` | 25.0% | 960 | 1/4 |
| `json-pretty` | 25.0% | 1,230 | 1/4 |
| `json-compact` | 0.0% | 755 | 0/4 |
@@ -368,34 +368,34 @@ Datasets with nested or semi-uniform structures. CSV excluded as it cannot prope
```
🛒 E-commerce orders with nested structures ┊ Tabular: 33%
TOON █████████████░░░░░░░ 73,271 tokens
├─ vs JSON (32.7%) 108,806 tokens
├─ vs JSON compact (+6.2%) 68,975 tokens
├─ vs YAML (13.6%) 84,780 tokens
└─ vs XML (40.1%) 122,406 tokens
TOON █████████████░░░░░░░ 72,771 tokens
├─ vs JSON (33.1%) 108,806 tokens
├─ vs JSON compact (+5.5%) 68,975 tokens
├─ vs YAML (14.2%) 84,780 tokens
└─ vs XML (40.5%) 122,406 tokens
🧾 Semi-uniform event logs ┊ Tabular: 50%
TOON █████████████████░░░ 155,211 tokens
├─ vs JSON (13.9%) 180,176 tokens
├─ vs JSON compact (+21.5%) 127,731 tokens
├─ vs YAML (+0.5%) 154,505 tokens
└─ vs XML (24.2%) 204,777 tokens
TOON █████████████████░░░ 153,211 tokens
├─ vs JSON (15.0%) 180,176 tokens
├─ vs JSON compact (+19.9%) 127,731 tokens
├─ vs YAML (0.8%) 154,505 tokens
└─ vs XML (25.2%) 204,777 tokens
🧩 Deeply nested configuration ┊ Tabular: 0%
TOON ██████████████░░░░░░ 636 tokens
├─ vs JSON (30.8%) 919 tokens
├─ vs JSON compact (+12.8%) 564 tokens
├─ vs YAML (5.5%) 673 tokens
└─ vs XML (36.9%) 1,008 tokens
TOON ██████████████░░░░░░ 631 tokens
├─ vs JSON (31.3%) 919 tokens
├─ vs JSON compact (+11.9%) 564 tokens
├─ vs YAML (6.2%) 673 tokens
└─ vs XML (37.4%) 1,008 tokens
──────────────────────────────────── Total ────────────────────────────────────
TOON ████████████████░░░░ 229,118 tokens
├─ vs JSON (21.0%) 289,901 tokens
├─ vs JSON compact (+16.1%) 197,270 tokens
├─ vs YAML (4.5%) 239,958 tokens
└─ vs XML (30.2%) 328,191 tokens
TOON ████████████████░░░░ 226,613 tokens
├─ vs JSON (21.8%) 289,901 tokens
├─ vs JSON compact (+14.9%) 197,270 tokens
├─ vs YAML (5.6%) 239,958 tokens
└─ vs XML (31.0%) 328,191 tokens
```
#### Flat-Only Track

View File

@@ -9,25 +9,25 @@ export function parseArrayHeaderLine(
content: string,
defaultDelimiter: Delimiter,
): { header: ArrayHeaderInfo, inlineValues?: string } | undefined {
const trimmed = content.trimStart()
const trimmedToken = content.trimStart()
// Find the bracket segment, accounting for quoted keys that may contain brackets
let bracketStart = -1
// For quoted keys, find bracket after closing quote (not inside the quoted string)
if (trimmed.startsWith(DOUBLE_QUOTE)) {
const closingQuoteIndex = findClosingQuote(trimmed, 0)
if (trimmedToken.startsWith(DOUBLE_QUOTE)) {
const closingQuoteIndex = findClosingQuote(trimmedToken, 0)
if (closingQuoteIndex === -1) {
return
}
const afterQuote = trimmed.slice(closingQuoteIndex + 1)
const afterQuote = trimmedToken.slice(closingQuoteIndex + 1)
if (!afterQuote.startsWith(OPEN_BRACKET)) {
return
}
// Calculate position in original content and find bracket after the quoted key
const leadingWhitespace = content.length - trimmed.length
const leadingWhitespace = content.length - trimmedToken.length
const keyEndIndex = leadingWhitespace + closingQuoteIndex + 1
bracketStart = content.indexOf(OPEN_BRACKET, keyEndIndex)
}
@@ -72,7 +72,6 @@ export function parseArrayHeaderLine(
}
const afterColon = content.slice(colonIndex + 1).trim()
const bracketContent = content.slice(bracketStart + 1, bracketEnd)
// Try to parse bracket segment
@@ -196,37 +195,37 @@ export function mapRowValuesToPrimitives(values: string[]): JsonPrimitive[] {
// #region Primitive and key parsing
export function parsePrimitiveToken(token: string): JsonPrimitive {
const trimmed = token.trim()
const trimmedToken = token.trim()
// Empty token
if (!trimmed) {
if (!trimmedToken) {
return ''
}
// Quoted string (if starts with quote, it MUST be properly quoted)
if (trimmed.startsWith(DOUBLE_QUOTE)) {
return parseStringLiteral(trimmed)
if (trimmedToken.startsWith(DOUBLE_QUOTE)) {
return parseStringLiteral(trimmedToken)
}
// Boolean or null literals
if (isBooleanOrNullLiteral(trimmed)) {
if (trimmed === TRUE_LITERAL)
if (isBooleanOrNullLiteral(trimmedToken)) {
if (trimmedToken === TRUE_LITERAL)
return true
if (trimmed === FALSE_LITERAL)
if (trimmedToken === FALSE_LITERAL)
return false
if (trimmed === NULL_LITERAL)
if (trimmedToken === NULL_LITERAL)
return null
}
// Numeric literal
if (isNumericLiteral(trimmed)) {
const parsedNumber = Number.parseFloat(trimmed)
if (isNumericLiteral(trimmedToken)) {
const parsedNumber = Number.parseFloat(trimmedToken)
// Normalize negative zero to positive zero
return Object.is(parsedNumber, -0) ? 0 : parsedNumber
}
// Unquoted string
return trimmed
return trimmedToken
}
export function parseStringLiteral(token: string): string {