test: add benchmarks for compact vs. pretty JSON

This commit is contained in:
Johann Schopplich
2025-10-30 15:02:51 +01:00
parent df68417d8b
commit 2c4f3c4362
14 changed files with 283 additions and 267 deletions

View File

@@ -3,7 +3,7 @@ import * as path from 'node:path'
import * as prompts from '@clack/prompts'
import { encode } from '../../src/index'
import githubRepos from '../data/github-repos.json' with { type: 'json' }
import { BENCHMARKS_DIR, ROOT_DIR } from '../src/constants'
import { BENCHMARKS_DIR, FORMATTER_DISPLAY_NAMES, ROOT_DIR } from '../src/constants'
import { generateAnalyticsData, generateOrderData } from '../src/datasets'
import { formatters } from '../src/formatters'
import { createProgressBar, ensureDir, tokenize } from '../src/utils'
@@ -50,118 +50,102 @@ const BENCHMARK_EXAMPLES = [
prompts.intro('Token Efficiency Benchmark')
// Calculate total savings
let totalJsonTokens = 0
let totalToonTokens = 0
let totalXmlTokens = 0
let totalYamlTokens = 0
const results: BenchmarkResult[] = []
const totalTokensByFormat: Record<string, number> = {}
for (const example of BENCHMARK_EXAMPLES) {
const data = example.getData()
const jsonString = JSON.stringify(data, undefined, 2)
const toonString = encode(data)
const xmlString = formatters.xml!(data)
const yamlString = formatters.yaml!(data)
// Calculate tokens for each format
const formatMetrics: FormatMetrics[] = []
const tokensByFormat: Record<string, number> = {}
const jsonTokens = tokenize(jsonString)
const toonTokens = tokenize(toonString)
const xmlTokens = tokenize(xmlString)
const yamlTokens = tokenize(yamlString)
for (const [formatName, formatter] of Object.entries(formatters)) {
const formattedString = formatter(data)
const tokens = tokenize(formattedString)
tokensByFormat[formatName] = tokens
totalTokensByFormat[formatName] = (totalTokensByFormat[formatName] || 0) + tokens
}
const jsonSavings = jsonTokens - toonTokens
const xmlSavings = xmlTokens - toonTokens
const yamlSavings = yamlTokens - toonTokens
totalJsonTokens += jsonTokens
totalToonTokens += toonTokens
totalXmlTokens += xmlTokens
totalYamlTokens += yamlTokens
// Calculate savings vs TOON
const toonTokens = tokensByFormat.toon!
for (const [formatName, tokens] of Object.entries(tokensByFormat)) {
const savings = tokens - toonTokens
formatMetrics.push({
name: formatName,
tokens,
savings,
savingsPercent: formatName === 'toon' ? '0.0' : ((savings / tokens) * 100).toFixed(1),
})
}
results.push({
name: example.name,
emoji: example.emoji,
description: example.description,
data,
formats: [
{
name: 'toon',
tokens: toonTokens,
savings: 0,
savingsPercent: '0.0',
},
{
name: 'json',
tokens: jsonTokens,
savings: jsonSavings,
savingsPercent: ((jsonSavings / jsonTokens) * 100).toFixed(1),
},
{
name: 'xml',
tokens: xmlTokens,
savings: xmlSavings,
savingsPercent: ((xmlSavings / xmlTokens) * 100).toFixed(1),
},
{
name: 'yaml',
tokens: yamlTokens,
savings: yamlSavings,
savingsPercent: ((yamlSavings / yamlTokens) * 100).toFixed(1),
},
],
formats: formatMetrics,
showDetailed: example.showDetailed,
})
}
const totalJsonSavings = totalJsonTokens - totalToonTokens
const totalJsonSavingsPercent = ((totalJsonSavings / totalJsonTokens) * 100).toFixed(1)
const totalXmlSavings = totalXmlTokens - totalToonTokens
const totalXmlSavingsPercent = ((totalXmlSavings / totalXmlTokens) * 100).toFixed(1)
const totalYamlSavings = totalYamlTokens - totalToonTokens
const totalYamlSavingsPercent = ((totalYamlSavings / totalYamlTokens) * 100).toFixed(1)
// Calculate total savings percentages
const totalToonTokens = totalTokensByFormat.toon!
const totalSavingsPercent: Record<string, string> = {}
for (const [formatName, totalTokens] of Object.entries(totalTokensByFormat)) {
if (formatName === 'toon') {
totalSavingsPercent[formatName] = '0.0'
}
else {
const savings = totalTokens - totalToonTokens
totalSavingsPercent[formatName] = ((savings / totalTokens) * 100).toFixed(1)
}
}
// Generate ASCII bar chart visualization (stacked compact format)
const formatOrder = ['json-pretty', 'json-compact', 'yaml', 'xml']
const datasetRows = results
.map((result) => {
const toon = result.formats.find(f => f.name === 'toon')!
const json = result.formats.find(f => f.name === 'json')!
const xml = result.formats.find(f => f.name === 'xml')!
const yaml = result.formats.find(f => f.name === 'yaml')!
const percentage = Number.parseFloat(json.savingsPercent)
const percentage = Number.parseFloat(result.formats.find(f => f.name === 'json-pretty')!.savingsPercent)
const bar = createProgressBar(100 - percentage, 100) // Invert to show TOON tokens
const toonStr = toon.tokens.toLocaleString('en-US')
const jsonStr = json.tokens.toLocaleString('en-US')
const xmlStr = xml.tokens.toLocaleString('en-US')
const yamlStr = yaml.tokens.toLocaleString('en-US')
const line1 = `${result.emoji} ${result.name.padEnd(25)} ${bar} ${toonStr.padStart(6)} tokens`
const line2 = ` vs JSON: ${jsonStr.padStart(6)} (-${json.savingsPercent}%)`
const line3 = ` vs YAML: ${yamlStr.padStart(6)} (-${yaml.savingsPercent}%)`
const line4 = ` vs XML: ${xmlStr.padStart(6)} (-${xml.savingsPercent}%)`
const line1 = `${result.emoji} ${result.name.padEnd(25)} ${bar} ${toonStr.padStart(6)} tokens`
return `${line1}\n${line2}\n${line3}\n${line4}`
const comparisonLines = formatOrder.map((formatName) => {
const format = result.formats.find(f => f.name === formatName)!
const label = FORMATTER_DISPLAY_NAMES[formatName] || formatName.toUpperCase()
const labelWithSavings = `vs ${label} (-${format.savingsPercent}%)`.padEnd(28)
const tokenStr = format.tokens.toLocaleString('en-US').padStart(6)
return ` ${labelWithSavings}${tokenStr}`
})
return [line1, ...comparisonLines].join('\n')
})
.join('\n\n')
// Add separator and totals row
const separator = '─────────────────────────────────────────────────────────────────────'
// Calculate bar for totals (TOON vs average of JSON+YAML+XML)
const averageComparisonTokens = (totalJsonTokens + totalYamlTokens + totalXmlTokens) / 3
// Calculate bar for totals (TOON vs average of comparison formats)
const comparisonTokens = formatOrder.map(name => totalTokensByFormat[name]!)
const averageComparisonTokens = comparisonTokens.reduce((a, b) => a + b, 0) / comparisonTokens.length
const totalPercentage = (totalToonTokens / averageComparisonTokens) * 100
const totalBar = createProgressBar(totalPercentage, 100)
const totalLine1 = `Total ${totalBar} ${totalToonTokens.toLocaleString('en-US').padStart(6)} tokens`
const totalLine2 = ` vs JSON: ${totalJsonTokens.toLocaleString('en-US').padStart(6)} (-${totalJsonSavingsPercent}%)`
const totalLine3 = ` vs YAML: ${totalYamlTokens.toLocaleString('en-US').padStart(6)} (-${totalYamlSavingsPercent}%)`
const totalLine4 = ` vs XML: ${totalXmlTokens.toLocaleString('en-US').padStart(6)} (-${totalXmlSavingsPercent}%)`
const totalLine1 = `Total ${totalBar} ${totalToonTokens.toLocaleString('en-US').padStart(6)} tokens`
const barChartSection = `${datasetRows}\n\n${separator}\n${totalLine1}\n${totalLine2}\n${totalLine3}\n${totalLine4}`
const totalComparisonLines = formatOrder.map((formatName) => {
const label = FORMATTER_DISPLAY_NAMES[formatName] || formatName.toUpperCase()
const tokens = totalTokensByFormat[formatName]!
const percent = totalSavingsPercent[formatName]!
const labelWithSavings = `vs ${label} (-${percent}%)`.padEnd(28)
const tokenStr = tokens.toLocaleString('en-US').padStart(6)
return ` ${labelWithSavings}${tokenStr}`
})
const barChartSection = `${datasetRows}\n\n${separator}\n${totalLine1}\n${totalComparisonLines.join('\n')}`
// Generate detailed examples (only for selected examples)
// Note: Large datasets are truncated for display readability in the report.
@@ -185,7 +169,7 @@ const detailedExamples = results
const separator = i < filtered.length - 1 ? '\n\n---' : ''
const json = result.formats.find(f => f.name === 'json')!
const json = result.formats.find(f => f.name === 'json-pretty')!
const toon = result.formats.find(f => f.name === 'toon')!
return `#### ${result.emoji} ${result.name}