From fb43bdf527a93a0e11a992987e8bc0a0435d17f7 Mon Sep 17 00:00:00 2001 From: Johann Schopplich Date: Thu, 30 Oct 2025 15:19:16 +0100 Subject: [PATCH] docs: adjust padding for benchmark comparison --- README.md | 80 +++++++++---------- benchmarks/results/retrieval-accuracy.md | 48 +++++------ benchmarks/results/token-efficiency.md | 32 ++++---- .../scripts/token-efficiency-benchmark.ts | 4 +- benchmarks/src/report.ts | 2 +- 5 files changed, 83 insertions(+), 83 deletions(-) diff --git a/README.md b/README.md index 7937585..4a8b4d3 100644 --- a/README.md +++ b/README.md @@ -58,29 +58,29 @@ The benchmarks test datasets that favor TOON's strengths (uniform tabular data). ``` ⭐ GitHub Repositories ██████████████░░░░░░░░░░░ 8,745 tokens - vs JSON (-42.3%) 15,145 - vs JSON compact (-23.7%) 11,455 - vs YAML (-33.4%) 13,129 - vs XML (-48.8%) 17,095 + vs JSON (-42.3%) 15,145 + vs JSON compact (-23.7%) 11,455 + vs YAML (-33.4%) 13,129 + vs XML (-48.8%) 17,095 📈 Daily Analytics ██████████░░░░░░░░░░░░░░░ 4,507 tokens - vs JSON (-58.9%) 10,977 - vs JSON compact (-35.7%) 7,013 - vs YAML (-48.8%) 8,810 - vs XML (-65.7%) 13,128 + vs JSON (-58.9%) 10,977 + vs JSON compact (-35.7%) 7,013 + vs YAML (-48.8%) 8,810 + vs XML (-65.7%) 13,128 🛒 E-Commerce Order ████████████████░░░░░░░░░ 166 tokens - vs JSON (-35.4%) 257 - vs JSON compact (-2.9%) 171 - vs YAML (-15.7%) 197 - vs XML (-38.7%) 271 + vs JSON (-35.4%) 257 + vs JSON compact (-2.9%) 171 + vs YAML (-15.7%) 197 + vs XML (-38.7%) 271 ───────────────────────────────────────────────────────────────────── Total ██████████████░░░░░░░░░░░ 13,418 tokens - vs JSON (-49.1%) 26,379 - vs JSON compact (-28.0%) 18,639 - vs YAML (-39.4%) 22,136 - vs XML (-56.0%) 30,494 + vs JSON (-49.1%) 26,379 + vs JSON compact (-28.0%) 18,639 + vs YAML (-39.4%) 22,136 + vs XML (-56.0%) 30,494 ```
@@ -232,36 +232,36 @@ Accuracy across **4 LLMs** on 154 data retrieval questions: ``` gpt-5-nano -→ TOON ███████████████████░ 96.1% (148/154) - CSV ██████████████████░░ 91.6% (141/154) - YAML ██████████████████░░ 91.6% (141/154) - JSON compact ██████████████████░░ 91.6% (141/154) - XML █████████████████░░░ 87.0% (134/154) - JSON █████████████████░░░ 86.4% (133/154) +→ TOON ███████████████████░ 96.1% (148/154) + CSV ██████████████████░░ 91.6% (141/154) + YAML ██████████████████░░ 91.6% (141/154) + JSON compact ██████████████████░░ 91.6% (141/154) + XML █████████████████░░░ 87.0% (134/154) + JSON █████████████████░░░ 86.4% (133/154) claude-haiku-4-5-20251001 - JSON ██████████░░░░░░░░░░ 50.0% (77/154) - YAML ██████████░░░░░░░░░░ 49.4% (76/154) -→ TOON ██████████░░░░░░░░░░ 48.7% (75/154) - XML ██████████░░░░░░░░░░ 48.1% (74/154) - CSV █████████░░░░░░░░░░░ 47.4% (73/154) - JSON compact █████████░░░░░░░░░░░ 44.2% (68/154) + JSON ██████████░░░░░░░░░░ 50.0% (77/154) + YAML ██████████░░░░░░░░░░ 49.4% (76/154) +→ TOON ██████████░░░░░░░░░░ 48.7% (75/154) + XML ██████████░░░░░░░░░░ 48.1% (74/154) + CSV █████████░░░░░░░░░░░ 47.4% (73/154) + JSON compact █████████░░░░░░░░░░░ 44.2% (68/154) gemini-2.5-flash - CSV ██████████████████░░ 87.7% (135/154) - XML ██████████████████░░ 87.7% (135/154) -→ TOON █████████████████░░░ 86.4% (133/154) - YAML ████████████████░░░░ 79.9% (123/154) - JSON compact ████████████████░░░░ 79.9% (123/154) - JSON ███████████████░░░░░ 76.6% (118/154) + CSV ██████████████████░░ 87.7% (135/154) + XML ██████████████████░░ 87.7% (135/154) +→ TOON █████████████████░░░ 86.4% (133/154) + YAML ████████████████░░░░ 79.9% (123/154) + JSON compact ████████████████░░░░ 79.9% (123/154) + JSON ███████████████░░░░░ 76.6% (118/154) grok-4-fast-non-reasoning -→ TOON ██████████░░░░░░░░░░ 49.4% (76/154) - JSON ██████████░░░░░░░░░░ 48.7% (75/154) - XML █████████░░░░░░░░░░░ 46.1% (71/154) - YAML █████████░░░░░░░░░░░ 46.1% (71/154) - JSON compact █████████░░░░░░░░░░░ 45.5% (70/154) - CSV █████████░░░░░░░░░░░ 44.2% (68/154) +→ TOON ██████████░░░░░░░░░░ 49.4% (76/154) + JSON ██████████░░░░░░░░░░ 48.7% (75/154) + XML █████████░░░░░░░░░░░ 46.1% (71/154) + YAML █████████░░░░░░░░░░░ 46.1% (71/154) + JSON compact █████████░░░░░░░░░░░ 45.5% (70/154) + CSV █████████░░░░░░░░░░░ 44.2% (68/154) ``` **Key tradeoff:** TOON achieves **70.1% accuracy** (vs JSON's 65.4%) while using **46.3% fewer tokens** on these datasets. diff --git a/benchmarks/results/retrieval-accuracy.md b/benchmarks/results/retrieval-accuracy.md index 1a2c137..2901112 100644 --- a/benchmarks/results/retrieval-accuracy.md +++ b/benchmarks/results/retrieval-accuracy.md @@ -4,36 +4,36 @@ Accuracy across **4 LLMs** on 154 data retrieval questions: ``` gpt-5-nano -→ TOON ███████████████████░ 96.1% (148/154) - CSV ██████████████████░░ 91.6% (141/154) - YAML ██████████████████░░ 91.6% (141/154) - JSON compact ██████████████████░░ 91.6% (141/154) - XML █████████████████░░░ 87.0% (134/154) - JSON █████████████████░░░ 86.4% (133/154) +→ TOON ███████████████████░ 96.1% (148/154) + CSV ██████████████████░░ 91.6% (141/154) + YAML ██████████████████░░ 91.6% (141/154) + JSON compact ██████████████████░░ 91.6% (141/154) + XML █████████████████░░░ 87.0% (134/154) + JSON █████████████████░░░ 86.4% (133/154) claude-haiku-4-5-20251001 - JSON ██████████░░░░░░░░░░ 50.0% (77/154) - YAML ██████████░░░░░░░░░░ 49.4% (76/154) -→ TOON ██████████░░░░░░░░░░ 48.7% (75/154) - XML ██████████░░░░░░░░░░ 48.1% (74/154) - CSV █████████░░░░░░░░░░░ 47.4% (73/154) - JSON compact █████████░░░░░░░░░░░ 44.2% (68/154) + JSON ██████████░░░░░░░░░░ 50.0% (77/154) + YAML ██████████░░░░░░░░░░ 49.4% (76/154) +→ TOON ██████████░░░░░░░░░░ 48.7% (75/154) + XML ██████████░░░░░░░░░░ 48.1% (74/154) + CSV █████████░░░░░░░░░░░ 47.4% (73/154) + JSON compact █████████░░░░░░░░░░░ 44.2% (68/154) gemini-2.5-flash - CSV ██████████████████░░ 87.7% (135/154) - XML ██████████████████░░ 87.7% (135/154) -→ TOON █████████████████░░░ 86.4% (133/154) - YAML ████████████████░░░░ 79.9% (123/154) - JSON compact ████████████████░░░░ 79.9% (123/154) - JSON ███████████████░░░░░ 76.6% (118/154) + CSV ██████████████████░░ 87.7% (135/154) + XML ██████████████████░░ 87.7% (135/154) +→ TOON █████████████████░░░ 86.4% (133/154) + YAML ████████████████░░░░ 79.9% (123/154) + JSON compact ████████████████░░░░ 79.9% (123/154) + JSON ███████████████░░░░░ 76.6% (118/154) grok-4-fast-non-reasoning -→ TOON ██████████░░░░░░░░░░ 49.4% (76/154) - JSON ██████████░░░░░░░░░░ 48.7% (75/154) - XML █████████░░░░░░░░░░░ 46.1% (71/154) - YAML █████████░░░░░░░░░░░ 46.1% (71/154) - JSON compact █████████░░░░░░░░░░░ 45.5% (70/154) - CSV █████████░░░░░░░░░░░ 44.2% (68/154) +→ TOON ██████████░░░░░░░░░░ 49.4% (76/154) + JSON ██████████░░░░░░░░░░ 48.7% (75/154) + XML █████████░░░░░░░░░░░ 46.1% (71/154) + YAML █████████░░░░░░░░░░░ 46.1% (71/154) + JSON compact █████████░░░░░░░░░░░ 45.5% (70/154) + CSV █████████░░░░░░░░░░░ 44.2% (68/154) ``` **Key tradeoff:** TOON achieves **70.1% accuracy** (vs JSON's 65.4%) while using **46.3% fewer tokens** on these datasets. diff --git a/benchmarks/results/token-efficiency.md b/benchmarks/results/token-efficiency.md index 5d68d97..a00bbb7 100644 --- a/benchmarks/results/token-efficiency.md +++ b/benchmarks/results/token-efficiency.md @@ -2,29 +2,29 @@ ``` ⭐ GitHub Repositories ██████████████░░░░░░░░░░░ 8,745 tokens - vs JSON (-42.3%) 15,145 - vs JSON compact (-23.7%) 11,455 - vs YAML (-33.4%) 13,129 - vs XML (-48.8%) 17,095 + vs JSON (-42.3%) 15,145 + vs JSON compact (-23.7%) 11,455 + vs YAML (-33.4%) 13,129 + vs XML (-48.8%) 17,095 📈 Daily Analytics ██████████░░░░░░░░░░░░░░░ 4,507 tokens - vs JSON (-58.9%) 10,977 - vs JSON compact (-35.7%) 7,013 - vs YAML (-48.8%) 8,810 - vs XML (-65.7%) 13,128 + vs JSON (-58.9%) 10,977 + vs JSON compact (-35.7%) 7,013 + vs YAML (-48.8%) 8,810 + vs XML (-65.7%) 13,128 🛒 E-Commerce Order ████████████████░░░░░░░░░ 166 tokens - vs JSON (-35.4%) 257 - vs JSON compact (-2.9%) 171 - vs YAML (-15.7%) 197 - vs XML (-38.7%) 271 + vs JSON (-35.4%) 257 + vs JSON compact (-2.9%) 171 + vs YAML (-15.7%) 197 + vs XML (-38.7%) 271 ───────────────────────────────────────────────────────────────────── Total ██████████████░░░░░░░░░░░ 13,418 tokens - vs JSON (-49.1%) 26,379 - vs JSON compact (-28.0%) 18,639 - vs YAML (-39.4%) 22,136 - vs XML (-56.0%) 30,494 + vs JSON (-49.1%) 26,379 + vs JSON compact (-28.0%) 18,639 + vs YAML (-39.4%) 22,136 + vs XML (-56.0%) 30,494 ```
diff --git a/benchmarks/scripts/token-efficiency-benchmark.ts b/benchmarks/scripts/token-efficiency-benchmark.ts index e903852..0aaca31 100644 --- a/benchmarks/scripts/token-efficiency-benchmark.ts +++ b/benchmarks/scripts/token-efficiency-benchmark.ts @@ -116,7 +116,7 @@ const datasetRows = results const comparisonLines = formatOrder.map((formatName) => { const format = result.formats.find(f => f.name === formatName)! const label = FORMATTER_DISPLAY_NAMES[formatName] || formatName.toUpperCase() - const labelWithSavings = `vs ${label} (-${format.savingsPercent}%)`.padEnd(28) + const labelWithSavings = `vs ${label} (-${format.savingsPercent}%)`.padEnd(27) const tokenStr = format.tokens.toLocaleString('en-US').padStart(6) return ` ${labelWithSavings}${tokenStr}` }) @@ -140,7 +140,7 @@ const totalComparisonLines = formatOrder.map((formatName) => { const label = FORMATTER_DISPLAY_NAMES[formatName] || formatName.toUpperCase() const tokens = totalTokensByFormat[formatName]! const percent = totalSavingsPercent[formatName]! - const labelWithSavings = `vs ${label} (-${percent}%)`.padEnd(28) + const labelWithSavings = `vs ${label} (-${percent}%)`.padEnd(27) const tokenStr = tokens.toLocaleString('en-US').padStart(6) return ` ${labelWithSavings}${tokenStr}` }) diff --git a/benchmarks/src/report.ts b/benchmarks/src/report.ts index d3ba9b0..41713c7 100644 --- a/benchmarks/src/report.ts +++ b/benchmarks/src/report.ts @@ -75,7 +75,7 @@ export function generateMarkdownReport( const countString = `(${result.correctCount}/${result.totalCount})` const prefix = result.format === 'toon' ? '→ ' : ' ' const displayName = FORMATTER_DISPLAY_NAMES[result.format] || result.format - return `${prefix}${displayName.padEnd(12)} ${bar} ${accuracyString} ${countString}` + return `${prefix}${displayName.padEnd(12)} ${bar} ${accuracyString} ${countString}` }).join('\n') // Add blank line before model name, except for first model