feat(cli): add --stats flag to show token savings (#51)

* feat(cli): add --stats flag to show token efficiency

- Add --stats boolean flag to display token count comparison
- Calculate approximate tokens using char length / 4 heuristic
- Show JSON vs TOON token counts with savings percentage
- Opt-in feature, default behavior unchanged

* feat: use tokenx for more accurate estimates

---------

Co-authored-by: Johann Schopplich <mail@johannschopplich.com>
This commit is contained in:
SangheeSon
2025-11-01 08:35:54 +09:00
committed by GitHub
parent af068f995d
commit 2b882870f7
4 changed files with 35 additions and 0 deletions

View File

@@ -499,11 +499,15 @@ toon input.json
| `--delimiter <char>` | Array delimiter: `,` (comma), `\t` (tab), `\|` (pipe) | | `--delimiter <char>` | Array delimiter: `,` (comma), `\t` (tab), `\|` (pipe) |
| `--indent <number>` | Indentation size (default: `2`) | | `--indent <number>` | Indentation size (default: `2`) |
| `--length-marker` | Add `#` prefix to array lengths (e.g., `items[#3]`) | | `--length-marker` | Add `#` prefix to array lengths (e.g., `items[#3]`) |
| `--stats` | Show token count estimates and savings (encode only) |
| `--no-strict` | Disable strict validation when decoding | | `--no-strict` | Disable strict validation when decoding |
### Examples ### Examples
```bash ```bash
# Show token savings when encoding
toon data.json --stats -o output.toon
# Tab-separated output (often more token-efficient) # Tab-separated output (often more token-efficient)
toon data.json --delimiter "\t" -o output.toon toon data.json --delimiter "\t" -o output.toon

View File

@@ -8,5 +8,8 @@
"dependencies": { "dependencies": {
"citty": "^0.1.6", "citty": "^0.1.6",
"consola": "^3.4.2" "consola": "^3.4.2"
},
"devDependencies": {
"tokenx": "^1.2.0"
} }
} }

View File

@@ -4,6 +4,7 @@ import * as path from 'node:path'
import process from 'node:process' import process from 'node:process'
import { defineCommand, runMain } from 'citty' import { defineCommand, runMain } from 'citty'
import { consola } from 'consola' import { consola } from 'consola'
import { estimateTokenCount } from 'tokenx'
import { name, version } from '../../package.json' with { type: 'json' } import { name, version } from '../../package.json' with { type: 'json' }
import { decode, DEFAULT_DELIMITER, DELIMITERS, encode } from '../../src' import { decode, DEFAULT_DELIMITER, DELIMITERS, encode } from '../../src'
@@ -54,6 +55,11 @@ const main = defineCommand({
description: 'Enable strict mode for decoding', description: 'Enable strict mode for decoding',
default: true, default: true,
}, },
stats: {
type: 'boolean',
description: 'Show token statistics',
default: false,
},
}, },
async run({ args }) { async run({ args }) {
const input = args.input || args._[0] const input = args.input || args._[0]
@@ -86,6 +92,7 @@ const main = defineCommand({
delimiter: delimiter as Delimiter, delimiter: delimiter as Delimiter,
indent, indent,
lengthMarker: args.lengthMarker === true ? '#' : false, lengthMarker: args.lengthMarker === true ? '#' : false,
printStats: args.stats === true,
}) })
} }
else { else {
@@ -131,6 +138,7 @@ async function encodeToToon(config: {
delimiter: Delimiter delimiter: Delimiter
indent: number indent: number
lengthMarker: NonNullable<EncodeOptions['lengthMarker']> lengthMarker: NonNullable<EncodeOptions['lengthMarker']>
printStats: boolean
}) { }) {
const jsonContent = await fsp.readFile(config.input, 'utf-8') const jsonContent = await fsp.readFile(config.input, 'utf-8')
@@ -159,6 +167,17 @@ async function encodeToToon(config: {
else { else {
console.log(toonOutput) console.log(toonOutput)
} }
if (config.printStats) {
const jsonTokens = estimateTokenCount(jsonContent)
const toonTokens = estimateTokenCount(toonOutput)
const diff = jsonTokens - toonTokens
const percent = ((diff / jsonTokens) * 100).toFixed(1)
console.log()
consola.info(`Token estimates: ~${jsonTokens} (JSON) → ~${toonTokens} (TOON)`)
consola.success(`Saved ~${diff} tokens (-${percent}%)`)
}
} }
async function decodeToJson(config: { async function decodeToJson(config: {

9
pnpm-lock.yaml generated
View File

@@ -95,6 +95,10 @@ importers:
consola: consola:
specifier: ^3.4.2 specifier: ^3.4.2
version: 3.4.2 version: 3.4.2
devDependencies:
tokenx:
specifier: ^1.2.0
version: 1.2.0
packages: packages:
@@ -2200,6 +2204,9 @@ packages:
resolution: {integrity: sha512-41wJyvKep3yT2tyPqX/4blcfybknGB4D+oETKLs7Q76UiPqRpUJK3hr1nxelyYO0PHKVzJwlu0aCeEAsGI6rpw==} resolution: {integrity: sha512-41wJyvKep3yT2tyPqX/4blcfybknGB4D+oETKLs7Q76UiPqRpUJK3hr1nxelyYO0PHKVzJwlu0aCeEAsGI6rpw==}
engines: {node: '>=20'} engines: {node: '>=20'}
tokenx@1.2.0:
resolution: {integrity: sha512-x4bRrL23b22H+EqW2pbhIkkt3ouj27ZGmAS1QoIqpocEO4m0sAl2H1M4L1UzKqleikY4U9lz/TbEw4jeG8tm2A==}
toml-eslint-parser@0.10.0: toml-eslint-parser@0.10.0:
resolution: {integrity: sha512-khrZo4buq4qVmsGzS5yQjKe/WsFvV8fGfOjDQN0q4iy9FjRfPWRgTFrU8u1R2iu/SfWLhY9WnCi4Jhdrcbtg+g==} resolution: {integrity: sha512-khrZo4buq4qVmsGzS5yQjKe/WsFvV8fGfOjDQN0q4iy9FjRfPWRgTFrU8u1R2iu/SfWLhY9WnCi4Jhdrcbtg+g==}
engines: {node: ^12.22.0 || ^14.17.0 || >=16.0.0} engines: {node: ^12.22.0 || ^14.17.0 || >=16.0.0}
@@ -4706,6 +4713,8 @@ snapshots:
'@sindresorhus/base62': 1.0.0 '@sindresorhus/base62': 1.0.0
reserved-identifiers: 1.2.0 reserved-identifiers: 1.2.0
tokenx@1.2.0: {}
toml-eslint-parser@0.10.0: toml-eslint-parser@0.10.0:
dependencies: dependencies:
eslint-visitor-keys: 3.4.3 eslint-visitor-keys: 3.4.3