mirror of
https://github.com/voson-wang/toon.git
synced 2026-01-29 15:24:10 +08:00
docs: adjust padding for benchmark comparison
This commit is contained in:
80
README.md
80
README.md
@@ -58,29 +58,29 @@ The benchmarks test datasets that favor TOON's strengths (uniform tabular data).
|
|||||||
|
|
||||||
```
|
```
|
||||||
⭐ GitHub Repositories ██████████████░░░░░░░░░░░ 8,745 tokens
|
⭐ GitHub Repositories ██████████████░░░░░░░░░░░ 8,745 tokens
|
||||||
vs JSON (-42.3%) 15,145
|
vs JSON (-42.3%) 15,145
|
||||||
vs JSON compact (-23.7%) 11,455
|
vs JSON compact (-23.7%) 11,455
|
||||||
vs YAML (-33.4%) 13,129
|
vs YAML (-33.4%) 13,129
|
||||||
vs XML (-48.8%) 17,095
|
vs XML (-48.8%) 17,095
|
||||||
|
|
||||||
📈 Daily Analytics ██████████░░░░░░░░░░░░░░░ 4,507 tokens
|
📈 Daily Analytics ██████████░░░░░░░░░░░░░░░ 4,507 tokens
|
||||||
vs JSON (-58.9%) 10,977
|
vs JSON (-58.9%) 10,977
|
||||||
vs JSON compact (-35.7%) 7,013
|
vs JSON compact (-35.7%) 7,013
|
||||||
vs YAML (-48.8%) 8,810
|
vs YAML (-48.8%) 8,810
|
||||||
vs XML (-65.7%) 13,128
|
vs XML (-65.7%) 13,128
|
||||||
|
|
||||||
🛒 E-Commerce Order ████████████████░░░░░░░░░ 166 tokens
|
🛒 E-Commerce Order ████████████████░░░░░░░░░ 166 tokens
|
||||||
vs JSON (-35.4%) 257
|
vs JSON (-35.4%) 257
|
||||||
vs JSON compact (-2.9%) 171
|
vs JSON compact (-2.9%) 171
|
||||||
vs YAML (-15.7%) 197
|
vs YAML (-15.7%) 197
|
||||||
vs XML (-38.7%) 271
|
vs XML (-38.7%) 271
|
||||||
|
|
||||||
─────────────────────────────────────────────────────────────────────
|
─────────────────────────────────────────────────────────────────────
|
||||||
Total ██████████████░░░░░░░░░░░ 13,418 tokens
|
Total ██████████████░░░░░░░░░░░ 13,418 tokens
|
||||||
vs JSON (-49.1%) 26,379
|
vs JSON (-49.1%) 26,379
|
||||||
vs JSON compact (-28.0%) 18,639
|
vs JSON compact (-28.0%) 18,639
|
||||||
vs YAML (-39.4%) 22,136
|
vs YAML (-39.4%) 22,136
|
||||||
vs XML (-56.0%) 30,494
|
vs XML (-56.0%) 30,494
|
||||||
```
|
```
|
||||||
|
|
||||||
<details>
|
<details>
|
||||||
@@ -232,36 +232,36 @@ Accuracy across **4 LLMs** on 154 data retrieval questions:
|
|||||||
|
|
||||||
```
|
```
|
||||||
gpt-5-nano
|
gpt-5-nano
|
||||||
→ TOON ███████████████████░ 96.1% (148/154)
|
→ TOON ███████████████████░ 96.1% (148/154)
|
||||||
CSV ██████████████████░░ 91.6% (141/154)
|
CSV ██████████████████░░ 91.6% (141/154)
|
||||||
YAML ██████████████████░░ 91.6% (141/154)
|
YAML ██████████████████░░ 91.6% (141/154)
|
||||||
JSON compact ██████████████████░░ 91.6% (141/154)
|
JSON compact ██████████████████░░ 91.6% (141/154)
|
||||||
XML █████████████████░░░ 87.0% (134/154)
|
XML █████████████████░░░ 87.0% (134/154)
|
||||||
JSON █████████████████░░░ 86.4% (133/154)
|
JSON █████████████████░░░ 86.4% (133/154)
|
||||||
|
|
||||||
claude-haiku-4-5-20251001
|
claude-haiku-4-5-20251001
|
||||||
JSON ██████████░░░░░░░░░░ 50.0% (77/154)
|
JSON ██████████░░░░░░░░░░ 50.0% (77/154)
|
||||||
YAML ██████████░░░░░░░░░░ 49.4% (76/154)
|
YAML ██████████░░░░░░░░░░ 49.4% (76/154)
|
||||||
→ TOON ██████████░░░░░░░░░░ 48.7% (75/154)
|
→ TOON ██████████░░░░░░░░░░ 48.7% (75/154)
|
||||||
XML ██████████░░░░░░░░░░ 48.1% (74/154)
|
XML ██████████░░░░░░░░░░ 48.1% (74/154)
|
||||||
CSV █████████░░░░░░░░░░░ 47.4% (73/154)
|
CSV █████████░░░░░░░░░░░ 47.4% (73/154)
|
||||||
JSON compact █████████░░░░░░░░░░░ 44.2% (68/154)
|
JSON compact █████████░░░░░░░░░░░ 44.2% (68/154)
|
||||||
|
|
||||||
gemini-2.5-flash
|
gemini-2.5-flash
|
||||||
CSV ██████████████████░░ 87.7% (135/154)
|
CSV ██████████████████░░ 87.7% (135/154)
|
||||||
XML ██████████████████░░ 87.7% (135/154)
|
XML ██████████████████░░ 87.7% (135/154)
|
||||||
→ TOON █████████████████░░░ 86.4% (133/154)
|
→ TOON █████████████████░░░ 86.4% (133/154)
|
||||||
YAML ████████████████░░░░ 79.9% (123/154)
|
YAML ████████████████░░░░ 79.9% (123/154)
|
||||||
JSON compact ████████████████░░░░ 79.9% (123/154)
|
JSON compact ████████████████░░░░ 79.9% (123/154)
|
||||||
JSON ███████████████░░░░░ 76.6% (118/154)
|
JSON ███████████████░░░░░ 76.6% (118/154)
|
||||||
|
|
||||||
grok-4-fast-non-reasoning
|
grok-4-fast-non-reasoning
|
||||||
→ TOON ██████████░░░░░░░░░░ 49.4% (76/154)
|
→ TOON ██████████░░░░░░░░░░ 49.4% (76/154)
|
||||||
JSON ██████████░░░░░░░░░░ 48.7% (75/154)
|
JSON ██████████░░░░░░░░░░ 48.7% (75/154)
|
||||||
XML █████████░░░░░░░░░░░ 46.1% (71/154)
|
XML █████████░░░░░░░░░░░ 46.1% (71/154)
|
||||||
YAML █████████░░░░░░░░░░░ 46.1% (71/154)
|
YAML █████████░░░░░░░░░░░ 46.1% (71/154)
|
||||||
JSON compact █████████░░░░░░░░░░░ 45.5% (70/154)
|
JSON compact █████████░░░░░░░░░░░ 45.5% (70/154)
|
||||||
CSV █████████░░░░░░░░░░░ 44.2% (68/154)
|
CSV █████████░░░░░░░░░░░ 44.2% (68/154)
|
||||||
```
|
```
|
||||||
|
|
||||||
**Key tradeoff:** TOON achieves **70.1% accuracy** (vs JSON's 65.4%) while using **46.3% fewer tokens** on these datasets.
|
**Key tradeoff:** TOON achieves **70.1% accuracy** (vs JSON's 65.4%) while using **46.3% fewer tokens** on these datasets.
|
||||||
|
|||||||
@@ -4,36 +4,36 @@ Accuracy across **4 LLMs** on 154 data retrieval questions:
|
|||||||
|
|
||||||
```
|
```
|
||||||
gpt-5-nano
|
gpt-5-nano
|
||||||
→ TOON ███████████████████░ 96.1% (148/154)
|
→ TOON ███████████████████░ 96.1% (148/154)
|
||||||
CSV ██████████████████░░ 91.6% (141/154)
|
CSV ██████████████████░░ 91.6% (141/154)
|
||||||
YAML ██████████████████░░ 91.6% (141/154)
|
YAML ██████████████████░░ 91.6% (141/154)
|
||||||
JSON compact ██████████████████░░ 91.6% (141/154)
|
JSON compact ██████████████████░░ 91.6% (141/154)
|
||||||
XML █████████████████░░░ 87.0% (134/154)
|
XML █████████████████░░░ 87.0% (134/154)
|
||||||
JSON █████████████████░░░ 86.4% (133/154)
|
JSON █████████████████░░░ 86.4% (133/154)
|
||||||
|
|
||||||
claude-haiku-4-5-20251001
|
claude-haiku-4-5-20251001
|
||||||
JSON ██████████░░░░░░░░░░ 50.0% (77/154)
|
JSON ██████████░░░░░░░░░░ 50.0% (77/154)
|
||||||
YAML ██████████░░░░░░░░░░ 49.4% (76/154)
|
YAML ██████████░░░░░░░░░░ 49.4% (76/154)
|
||||||
→ TOON ██████████░░░░░░░░░░ 48.7% (75/154)
|
→ TOON ██████████░░░░░░░░░░ 48.7% (75/154)
|
||||||
XML ██████████░░░░░░░░░░ 48.1% (74/154)
|
XML ██████████░░░░░░░░░░ 48.1% (74/154)
|
||||||
CSV █████████░░░░░░░░░░░ 47.4% (73/154)
|
CSV █████████░░░░░░░░░░░ 47.4% (73/154)
|
||||||
JSON compact █████████░░░░░░░░░░░ 44.2% (68/154)
|
JSON compact █████████░░░░░░░░░░░ 44.2% (68/154)
|
||||||
|
|
||||||
gemini-2.5-flash
|
gemini-2.5-flash
|
||||||
CSV ██████████████████░░ 87.7% (135/154)
|
CSV ██████████████████░░ 87.7% (135/154)
|
||||||
XML ██████████████████░░ 87.7% (135/154)
|
XML ██████████████████░░ 87.7% (135/154)
|
||||||
→ TOON █████████████████░░░ 86.4% (133/154)
|
→ TOON █████████████████░░░ 86.4% (133/154)
|
||||||
YAML ████████████████░░░░ 79.9% (123/154)
|
YAML ████████████████░░░░ 79.9% (123/154)
|
||||||
JSON compact ████████████████░░░░ 79.9% (123/154)
|
JSON compact ████████████████░░░░ 79.9% (123/154)
|
||||||
JSON ███████████████░░░░░ 76.6% (118/154)
|
JSON ███████████████░░░░░ 76.6% (118/154)
|
||||||
|
|
||||||
grok-4-fast-non-reasoning
|
grok-4-fast-non-reasoning
|
||||||
→ TOON ██████████░░░░░░░░░░ 49.4% (76/154)
|
→ TOON ██████████░░░░░░░░░░ 49.4% (76/154)
|
||||||
JSON ██████████░░░░░░░░░░ 48.7% (75/154)
|
JSON ██████████░░░░░░░░░░ 48.7% (75/154)
|
||||||
XML █████████░░░░░░░░░░░ 46.1% (71/154)
|
XML █████████░░░░░░░░░░░ 46.1% (71/154)
|
||||||
YAML █████████░░░░░░░░░░░ 46.1% (71/154)
|
YAML █████████░░░░░░░░░░░ 46.1% (71/154)
|
||||||
JSON compact █████████░░░░░░░░░░░ 45.5% (70/154)
|
JSON compact █████████░░░░░░░░░░░ 45.5% (70/154)
|
||||||
CSV █████████░░░░░░░░░░░ 44.2% (68/154)
|
CSV █████████░░░░░░░░░░░ 44.2% (68/154)
|
||||||
```
|
```
|
||||||
|
|
||||||
**Key tradeoff:** TOON achieves **70.1% accuracy** (vs JSON's 65.4%) while using **46.3% fewer tokens** on these datasets.
|
**Key tradeoff:** TOON achieves **70.1% accuracy** (vs JSON's 65.4%) while using **46.3% fewer tokens** on these datasets.
|
||||||
|
|||||||
@@ -2,29 +2,29 @@
|
|||||||
|
|
||||||
```
|
```
|
||||||
⭐ GitHub Repositories ██████████████░░░░░░░░░░░ 8,745 tokens
|
⭐ GitHub Repositories ██████████████░░░░░░░░░░░ 8,745 tokens
|
||||||
vs JSON (-42.3%) 15,145
|
vs JSON (-42.3%) 15,145
|
||||||
vs JSON compact (-23.7%) 11,455
|
vs JSON compact (-23.7%) 11,455
|
||||||
vs YAML (-33.4%) 13,129
|
vs YAML (-33.4%) 13,129
|
||||||
vs XML (-48.8%) 17,095
|
vs XML (-48.8%) 17,095
|
||||||
|
|
||||||
📈 Daily Analytics ██████████░░░░░░░░░░░░░░░ 4,507 tokens
|
📈 Daily Analytics ██████████░░░░░░░░░░░░░░░ 4,507 tokens
|
||||||
vs JSON (-58.9%) 10,977
|
vs JSON (-58.9%) 10,977
|
||||||
vs JSON compact (-35.7%) 7,013
|
vs JSON compact (-35.7%) 7,013
|
||||||
vs YAML (-48.8%) 8,810
|
vs YAML (-48.8%) 8,810
|
||||||
vs XML (-65.7%) 13,128
|
vs XML (-65.7%) 13,128
|
||||||
|
|
||||||
🛒 E-Commerce Order ████████████████░░░░░░░░░ 166 tokens
|
🛒 E-Commerce Order ████████████████░░░░░░░░░ 166 tokens
|
||||||
vs JSON (-35.4%) 257
|
vs JSON (-35.4%) 257
|
||||||
vs JSON compact (-2.9%) 171
|
vs JSON compact (-2.9%) 171
|
||||||
vs YAML (-15.7%) 197
|
vs YAML (-15.7%) 197
|
||||||
vs XML (-38.7%) 271
|
vs XML (-38.7%) 271
|
||||||
|
|
||||||
─────────────────────────────────────────────────────────────────────
|
─────────────────────────────────────────────────────────────────────
|
||||||
Total ██████████████░░░░░░░░░░░ 13,418 tokens
|
Total ██████████████░░░░░░░░░░░ 13,418 tokens
|
||||||
vs JSON (-49.1%) 26,379
|
vs JSON (-49.1%) 26,379
|
||||||
vs JSON compact (-28.0%) 18,639
|
vs JSON compact (-28.0%) 18,639
|
||||||
vs YAML (-39.4%) 22,136
|
vs YAML (-39.4%) 22,136
|
||||||
vs XML (-56.0%) 30,494
|
vs XML (-56.0%) 30,494
|
||||||
```
|
```
|
||||||
|
|
||||||
<details>
|
<details>
|
||||||
|
|||||||
@@ -116,7 +116,7 @@ const datasetRows = results
|
|||||||
const comparisonLines = formatOrder.map((formatName) => {
|
const comparisonLines = formatOrder.map((formatName) => {
|
||||||
const format = result.formats.find(f => f.name === formatName)!
|
const format = result.formats.find(f => f.name === formatName)!
|
||||||
const label = FORMATTER_DISPLAY_NAMES[formatName] || formatName.toUpperCase()
|
const label = FORMATTER_DISPLAY_NAMES[formatName] || formatName.toUpperCase()
|
||||||
const labelWithSavings = `vs ${label} (-${format.savingsPercent}%)`.padEnd(28)
|
const labelWithSavings = `vs ${label} (-${format.savingsPercent}%)`.padEnd(27)
|
||||||
const tokenStr = format.tokens.toLocaleString('en-US').padStart(6)
|
const tokenStr = format.tokens.toLocaleString('en-US').padStart(6)
|
||||||
return ` ${labelWithSavings}${tokenStr}`
|
return ` ${labelWithSavings}${tokenStr}`
|
||||||
})
|
})
|
||||||
@@ -140,7 +140,7 @@ const totalComparisonLines = formatOrder.map((formatName) => {
|
|||||||
const label = FORMATTER_DISPLAY_NAMES[formatName] || formatName.toUpperCase()
|
const label = FORMATTER_DISPLAY_NAMES[formatName] || formatName.toUpperCase()
|
||||||
const tokens = totalTokensByFormat[formatName]!
|
const tokens = totalTokensByFormat[formatName]!
|
||||||
const percent = totalSavingsPercent[formatName]!
|
const percent = totalSavingsPercent[formatName]!
|
||||||
const labelWithSavings = `vs ${label} (-${percent}%)`.padEnd(28)
|
const labelWithSavings = `vs ${label} (-${percent}%)`.padEnd(27)
|
||||||
const tokenStr = tokens.toLocaleString('en-US').padStart(6)
|
const tokenStr = tokens.toLocaleString('en-US').padStart(6)
|
||||||
return ` ${labelWithSavings}${tokenStr}`
|
return ` ${labelWithSavings}${tokenStr}`
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -75,7 +75,7 @@ export function generateMarkdownReport(
|
|||||||
const countString = `(${result.correctCount}/${result.totalCount})`
|
const countString = `(${result.correctCount}/${result.totalCount})`
|
||||||
const prefix = result.format === 'toon' ? '→ ' : ' '
|
const prefix = result.format === 'toon' ? '→ ' : ' '
|
||||||
const displayName = FORMATTER_DISPLAY_NAMES[result.format] || result.format
|
const displayName = FORMATTER_DISPLAY_NAMES[result.format] || result.format
|
||||||
return `${prefix}${displayName.padEnd(12)} ${bar} ${accuracyString} ${countString}`
|
return `${prefix}${displayName.padEnd(12)} ${bar} ${accuracyString} ${countString}`
|
||||||
}).join('\n')
|
}).join('\n')
|
||||||
|
|
||||||
// Add blank line before model name, except for first model
|
// Add blank line before model name, except for first model
|
||||||
|
|||||||
Reference in New Issue
Block a user