docs(accuracy): highlight toon in perf table

This commit is contained in:
Johann Schopplich
2025-10-28 23:08:47 +01:00
parent ecf578a7dc
commit e757746351
2 changed files with 10 additions and 7 deletions

View File

@@ -4,7 +4,7 @@ Accuracy across **4 LLMs** on 154 data retrieval questions:
``` ```
gpt-5-nano gpt-5-nano
toon ███████████████████░ 96.1% (148/154) toon ███████████████████░ 96.1% (148/154)
csv ██████████████████░░ 90.3% (139/154) csv ██████████████████░░ 90.3% (139/154)
yaml ██████████████████░░ 89.0% (137/154) yaml ██████████████████░░ 89.0% (137/154)
json ██████████████████░░ 87.7% (135/154) json ██████████████████░░ 87.7% (135/154)
@@ -12,7 +12,7 @@ gpt-5-nano
claude-haiku-4-5-20251001 claude-haiku-4-5-20251001
yaml ██████████░░░░░░░░░░ 49.4% (76/154) yaml ██████████░░░░░░░░░░ 49.4% (76/154)
toon ██████████░░░░░░░░░░ 48.1% (74/154) toon ██████████░░░░░░░░░░ 48.1% (74/154)
csv ██████████░░░░░░░░░░ 48.1% (74/154) csv ██████████░░░░░░░░░░ 48.1% (74/154)
json █████████░░░░░░░░░░░ 47.4% (73/154) json █████████░░░░░░░░░░░ 47.4% (73/154)
xml █████████░░░░░░░░░░░ 46.8% (72/154) xml █████████░░░░░░░░░░░ 46.8% (72/154)
@@ -20,12 +20,12 @@ claude-haiku-4-5-20251001
gemini-2.5-flash gemini-2.5-flash
csv ██████████████████░░ 87.7% (135/154) csv ██████████████████░░ 87.7% (135/154)
xml █████████████████░░░ 85.1% (131/154) xml █████████████████░░░ 85.1% (131/154)
toon █████████████████░░░ 83.8% (129/154) toon █████████████████░░░ 83.8% (129/154)
json ████████████████░░░░ 78.6% (121/154) json ████████████████░░░░ 78.6% (121/154)
yaml ███████████████░░░░░ 76.6% (118/154) yaml ███████████████░░░░░ 76.6% (118/154)
grok-4-fast-non-reasoning grok-4-fast-non-reasoning
toon ██████████░░░░░░░░░░ 48.7% (75/154) toon ██████████░░░░░░░░░░ 48.7% (75/154)
json ██████████░░░░░░░░░░ 48.1% (74/154) json ██████████░░░░░░░░░░ 48.1% (74/154)
xml █████████░░░░░░░░░░░ 47.4% (73/154) xml █████████░░░░░░░░░░░ 47.4% (73/154)
yaml █████████░░░░░░░░░░░ 46.8% (72/154) yaml █████████░░░░░░░░░░░ 46.8% (72/154)

View File

@@ -73,7 +73,8 @@ export function generateMarkdownReport(
const bar = createProgressBar(result.accuracy, 1, 20) const bar = createProgressBar(result.accuracy, 1, 20)
const accuracyStr = `${(result.accuracy * 100).toFixed(1)}%`.padStart(6) const accuracyStr = `${(result.accuracy * 100).toFixed(1)}%`.padStart(6)
const countStr = `(${result.correctCount}/${result.totalCount})` const countStr = `(${result.correctCount}/${result.totalCount})`
return ` ${result.format.padEnd(12)} ${bar} ${accuracyStr} ${countStr}` const prefix = result.format === 'toon' ? '→ ' : ' '
return `${prefix}${result.format.padEnd(12)} ${bar} ${accuracyStr} ${countStr}`
}).join('\n') }).join('\n')
// Add blank line before model name, except for first model // Add blank line before model name, except for first model
@@ -134,7 +135,7 @@ export function generateMarkdownReport(
| ------ | -------- | ------ | ------------- | | ------ | -------- | ------ | ------------- |
${tableRows} ${tableRows}
`.trimStart() `.trimStart()
}).filter(Boolean).join('\n') }).filter(Boolean).join('\n').trim()
// Build performance by model // Build performance by model
const modelPerformance = modelNames.map((modelName) => { const modelPerformance = modelNames.map((modelName) => {
@@ -163,7 +164,7 @@ ${tableRows}
| ------ | -------- | ------------- | | ------ | -------- | ------------- |
${tableRows} ${tableRows}
`.trimStart() `.trimStart()
}).join('\n') }).join('\n').trim()
// Calculate total unique questions // Calculate total unique questions
const totalQuestions = [...new Set(results.map(r => r.questionId))].length const totalQuestions = [...new Set(results.map(r => r.questionId))].length
@@ -204,9 +205,11 @@ ${summaryComparison}
#### Performance by Dataset #### Performance by Dataset
${datasetBreakdown} ${datasetBreakdown}
#### Performance by Model #### Performance by Model
${modelPerformance} ${modelPerformance}
</details> </details>
<details> <details>