From e757746351018df9610386181c9e1638f7ef2eb1 Mon Sep 17 00:00:00 2001 From: Johann Schopplich Date: Tue, 28 Oct 2025 23:08:47 +0100 Subject: [PATCH] docs(accuracy): highlight toon in perf table --- benchmarks/results/retrieval-accuracy.md | 8 ++++---- benchmarks/src/report.ts | 9 ++++++--- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/benchmarks/results/retrieval-accuracy.md b/benchmarks/results/retrieval-accuracy.md index 49e5bb5..2e1cb37 100644 --- a/benchmarks/results/retrieval-accuracy.md +++ b/benchmarks/results/retrieval-accuracy.md @@ -4,7 +4,7 @@ Accuracy across **4 LLMs** on 154 data retrieval questions: ``` gpt-5-nano - toon ███████████████████░ 96.1% (148/154) +→ toon ███████████████████░ 96.1% (148/154) csv ██████████████████░░ 90.3% (139/154) yaml ██████████████████░░ 89.0% (137/154) json ██████████████████░░ 87.7% (135/154) @@ -12,7 +12,7 @@ gpt-5-nano claude-haiku-4-5-20251001 yaml ██████████░░░░░░░░░░ 49.4% (76/154) - toon ██████████░░░░░░░░░░ 48.1% (74/154) +→ toon ██████████░░░░░░░░░░ 48.1% (74/154) csv ██████████░░░░░░░░░░ 48.1% (74/154) json █████████░░░░░░░░░░░ 47.4% (73/154) xml █████████░░░░░░░░░░░ 46.8% (72/154) @@ -20,12 +20,12 @@ claude-haiku-4-5-20251001 gemini-2.5-flash csv ██████████████████░░ 87.7% (135/154) xml █████████████████░░░ 85.1% (131/154) - toon █████████████████░░░ 83.8% (129/154) +→ toon █████████████████░░░ 83.8% (129/154) json ████████████████░░░░ 78.6% (121/154) yaml ███████████████░░░░░ 76.6% (118/154) grok-4-fast-non-reasoning - toon ██████████░░░░░░░░░░ 48.7% (75/154) +→ toon ██████████░░░░░░░░░░ 48.7% (75/154) json ██████████░░░░░░░░░░ 48.1% (74/154) xml █████████░░░░░░░░░░░ 47.4% (73/154) yaml █████████░░░░░░░░░░░ 46.8% (72/154) diff --git a/benchmarks/src/report.ts b/benchmarks/src/report.ts index 90f85a1..df12691 100644 --- a/benchmarks/src/report.ts +++ b/benchmarks/src/report.ts @@ -73,7 +73,8 @@ export function generateMarkdownReport( const bar = createProgressBar(result.accuracy, 1, 20) const accuracyStr = `${(result.accuracy * 100).toFixed(1)}%`.padStart(6) const countStr = `(${result.correctCount}/${result.totalCount})` - return ` ${result.format.padEnd(12)} ${bar} ${accuracyStr} ${countStr}` + const prefix = result.format === 'toon' ? '→ ' : ' ' + return `${prefix}${result.format.padEnd(12)} ${bar} ${accuracyStr} ${countStr}` }).join('\n') // Add blank line before model name, except for first model @@ -134,7 +135,7 @@ export function generateMarkdownReport( | ------ | -------- | ------ | ------------- | ${tableRows} `.trimStart() - }).filter(Boolean).join('\n') + }).filter(Boolean).join('\n').trim() // Build performance by model const modelPerformance = modelNames.map((modelName) => { @@ -163,7 +164,7 @@ ${tableRows} | ------ | -------- | ------------- | ${tableRows} `.trimStart() - }).join('\n') + }).join('\n').trim() // Calculate total unique questions const totalQuestions = [...new Set(results.map(r => r.questionId))].length @@ -204,9 +205,11 @@ ${summaryComparison} #### Performance by Dataset ${datasetBreakdown} + #### Performance by Model ${modelPerformance} +