feat(cli): add --stats flag to show token savings (#51)

* feat(cli): add --stats flag to show token efficiency

- Add --stats boolean flag to display token count comparison
- Calculate approximate tokens using char length / 4 heuristic
- Show JSON vs TOON token counts with savings percentage
- Opt-in feature, default behavior unchanged

* feat: use tokenx for more accurate estimates

---------

Co-authored-by: Johann Schopplich <mail@johannschopplich.com>
This commit is contained in:
SangheeSon
2025-11-01 08:35:54 +09:00
committed by GitHub
parent af068f995d
commit 2b882870f7
4 changed files with 35 additions and 0 deletions

View File

@@ -499,11 +499,15 @@ toon input.json
| `--delimiter <char>` | Array delimiter: `,` (comma), `\t` (tab), `\|` (pipe) |
| `--indent <number>` | Indentation size (default: `2`) |
| `--length-marker` | Add `#` prefix to array lengths (e.g., `items[#3]`) |
| `--stats` | Show token count estimates and savings (encode only) |
| `--no-strict` | Disable strict validation when decoding |
### Examples
```bash
# Show token savings when encoding
toon data.json --stats -o output.toon
# Tab-separated output (often more token-efficient)
toon data.json --delimiter "\t" -o output.toon

View File

@@ -8,5 +8,8 @@
"dependencies": {
"citty": "^0.1.6",
"consola": "^3.4.2"
},
"devDependencies": {
"tokenx": "^1.2.0"
}
}

View File

@@ -4,6 +4,7 @@ import * as path from 'node:path'
import process from 'node:process'
import { defineCommand, runMain } from 'citty'
import { consola } from 'consola'
import { estimateTokenCount } from 'tokenx'
import { name, version } from '../../package.json' with { type: 'json' }
import { decode, DEFAULT_DELIMITER, DELIMITERS, encode } from '../../src'
@@ -54,6 +55,11 @@ const main = defineCommand({
description: 'Enable strict mode for decoding',
default: true,
},
stats: {
type: 'boolean',
description: 'Show token statistics',
default: false,
},
},
async run({ args }) {
const input = args.input || args._[0]
@@ -86,6 +92,7 @@ const main = defineCommand({
delimiter: delimiter as Delimiter,
indent,
lengthMarker: args.lengthMarker === true ? '#' : false,
printStats: args.stats === true,
})
}
else {
@@ -131,6 +138,7 @@ async function encodeToToon(config: {
delimiter: Delimiter
indent: number
lengthMarker: NonNullable<EncodeOptions['lengthMarker']>
printStats: boolean
}) {
const jsonContent = await fsp.readFile(config.input, 'utf-8')
@@ -159,6 +167,17 @@ async function encodeToToon(config: {
else {
console.log(toonOutput)
}
if (config.printStats) {
const jsonTokens = estimateTokenCount(jsonContent)
const toonTokens = estimateTokenCount(toonOutput)
const diff = jsonTokens - toonTokens
const percent = ((diff / jsonTokens) * 100).toFixed(1)
console.log()
consola.info(`Token estimates: ~${jsonTokens} (JSON) → ~${toonTokens} (TOON)`)
consola.success(`Saved ~${diff} tokens (-${percent}%)`)
}
}
async function decodeToJson(config: {

9
pnpm-lock.yaml generated
View File

@@ -95,6 +95,10 @@ importers:
consola:
specifier: ^3.4.2
version: 3.4.2
devDependencies:
tokenx:
specifier: ^1.2.0
version: 1.2.0
packages:
@@ -2200,6 +2204,9 @@ packages:
resolution: {integrity: sha512-41wJyvKep3yT2tyPqX/4blcfybknGB4D+oETKLs7Q76UiPqRpUJK3hr1nxelyYO0PHKVzJwlu0aCeEAsGI6rpw==}
engines: {node: '>=20'}
tokenx@1.2.0:
resolution: {integrity: sha512-x4bRrL23b22H+EqW2pbhIkkt3ouj27ZGmAS1QoIqpocEO4m0sAl2H1M4L1UzKqleikY4U9lz/TbEw4jeG8tm2A==}
toml-eslint-parser@0.10.0:
resolution: {integrity: sha512-khrZo4buq4qVmsGzS5yQjKe/WsFvV8fGfOjDQN0q4iy9FjRfPWRgTFrU8u1R2iu/SfWLhY9WnCi4Jhdrcbtg+g==}
engines: {node: ^12.22.0 || ^14.17.0 || >=16.0.0}
@@ -4706,6 +4713,8 @@ snapshots:
'@sindresorhus/base62': 1.0.0
reserved-identifiers: 1.2.0
tokenx@1.2.0: {}
toml-eslint-parser@0.10.0:
dependencies:
eslint-visitor-keys: 3.4.3