mirror of
https://github.com/voson-wang/toon.git
synced 2026-01-29 23:34:10 +08:00
docs(accuracy): highlight toon in perf table
This commit is contained in:
@@ -4,7 +4,7 @@ Accuracy across **4 LLMs** on 154 data retrieval questions:
|
|||||||
|
|
||||||
```
|
```
|
||||||
gpt-5-nano
|
gpt-5-nano
|
||||||
toon ███████████████████░ 96.1% (148/154)
|
→ toon ███████████████████░ 96.1% (148/154)
|
||||||
csv ██████████████████░░ 90.3% (139/154)
|
csv ██████████████████░░ 90.3% (139/154)
|
||||||
yaml ██████████████████░░ 89.0% (137/154)
|
yaml ██████████████████░░ 89.0% (137/154)
|
||||||
json ██████████████████░░ 87.7% (135/154)
|
json ██████████████████░░ 87.7% (135/154)
|
||||||
@@ -12,7 +12,7 @@ gpt-5-nano
|
|||||||
|
|
||||||
claude-haiku-4-5-20251001
|
claude-haiku-4-5-20251001
|
||||||
yaml ██████████░░░░░░░░░░ 49.4% (76/154)
|
yaml ██████████░░░░░░░░░░ 49.4% (76/154)
|
||||||
toon ██████████░░░░░░░░░░ 48.1% (74/154)
|
→ toon ██████████░░░░░░░░░░ 48.1% (74/154)
|
||||||
csv ██████████░░░░░░░░░░ 48.1% (74/154)
|
csv ██████████░░░░░░░░░░ 48.1% (74/154)
|
||||||
json █████████░░░░░░░░░░░ 47.4% (73/154)
|
json █████████░░░░░░░░░░░ 47.4% (73/154)
|
||||||
xml █████████░░░░░░░░░░░ 46.8% (72/154)
|
xml █████████░░░░░░░░░░░ 46.8% (72/154)
|
||||||
@@ -20,12 +20,12 @@ claude-haiku-4-5-20251001
|
|||||||
gemini-2.5-flash
|
gemini-2.5-flash
|
||||||
csv ██████████████████░░ 87.7% (135/154)
|
csv ██████████████████░░ 87.7% (135/154)
|
||||||
xml █████████████████░░░ 85.1% (131/154)
|
xml █████████████████░░░ 85.1% (131/154)
|
||||||
toon █████████████████░░░ 83.8% (129/154)
|
→ toon █████████████████░░░ 83.8% (129/154)
|
||||||
json ████████████████░░░░ 78.6% (121/154)
|
json ████████████████░░░░ 78.6% (121/154)
|
||||||
yaml ███████████████░░░░░ 76.6% (118/154)
|
yaml ███████████████░░░░░ 76.6% (118/154)
|
||||||
|
|
||||||
grok-4-fast-non-reasoning
|
grok-4-fast-non-reasoning
|
||||||
toon ██████████░░░░░░░░░░ 48.7% (75/154)
|
→ toon ██████████░░░░░░░░░░ 48.7% (75/154)
|
||||||
json ██████████░░░░░░░░░░ 48.1% (74/154)
|
json ██████████░░░░░░░░░░ 48.1% (74/154)
|
||||||
xml █████████░░░░░░░░░░░ 47.4% (73/154)
|
xml █████████░░░░░░░░░░░ 47.4% (73/154)
|
||||||
yaml █████████░░░░░░░░░░░ 46.8% (72/154)
|
yaml █████████░░░░░░░░░░░ 46.8% (72/154)
|
||||||
|
|||||||
@@ -73,7 +73,8 @@ export function generateMarkdownReport(
|
|||||||
const bar = createProgressBar(result.accuracy, 1, 20)
|
const bar = createProgressBar(result.accuracy, 1, 20)
|
||||||
const accuracyStr = `${(result.accuracy * 100).toFixed(1)}%`.padStart(6)
|
const accuracyStr = `${(result.accuracy * 100).toFixed(1)}%`.padStart(6)
|
||||||
const countStr = `(${result.correctCount}/${result.totalCount})`
|
const countStr = `(${result.correctCount}/${result.totalCount})`
|
||||||
return ` ${result.format.padEnd(12)} ${bar} ${accuracyStr} ${countStr}`
|
const prefix = result.format === 'toon' ? '→ ' : ' '
|
||||||
|
return `${prefix}${result.format.padEnd(12)} ${bar} ${accuracyStr} ${countStr}`
|
||||||
}).join('\n')
|
}).join('\n')
|
||||||
|
|
||||||
// Add blank line before model name, except for first model
|
// Add blank line before model name, except for first model
|
||||||
@@ -134,7 +135,7 @@ export function generateMarkdownReport(
|
|||||||
| ------ | -------- | ------ | ------------- |
|
| ------ | -------- | ------ | ------------- |
|
||||||
${tableRows}
|
${tableRows}
|
||||||
`.trimStart()
|
`.trimStart()
|
||||||
}).filter(Boolean).join('\n')
|
}).filter(Boolean).join('\n').trim()
|
||||||
|
|
||||||
// Build performance by model
|
// Build performance by model
|
||||||
const modelPerformance = modelNames.map((modelName) => {
|
const modelPerformance = modelNames.map((modelName) => {
|
||||||
@@ -163,7 +164,7 @@ ${tableRows}
|
|||||||
| ------ | -------- | ------------- |
|
| ------ | -------- | ------------- |
|
||||||
${tableRows}
|
${tableRows}
|
||||||
`.trimStart()
|
`.trimStart()
|
||||||
}).join('\n')
|
}).join('\n').trim()
|
||||||
|
|
||||||
// Calculate total unique questions
|
// Calculate total unique questions
|
||||||
const totalQuestions = [...new Set(results.map(r => r.questionId))].length
|
const totalQuestions = [...new Set(results.map(r => r.questionId))].length
|
||||||
@@ -204,9 +205,11 @@ ${summaryComparison}
|
|||||||
#### Performance by Dataset
|
#### Performance by Dataset
|
||||||
|
|
||||||
${datasetBreakdown}
|
${datasetBreakdown}
|
||||||
|
|
||||||
#### Performance by Model
|
#### Performance by Model
|
||||||
|
|
||||||
${modelPerformance}
|
${modelPerformance}
|
||||||
|
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
<details>
|
<details>
|
||||||
|
|||||||
Reference in New Issue
Block a user