feat(cli): memory-efficient streaming for encoding

This commit is contained in:
Johann Schopplich
2025-11-21 14:02:22 +01:00
parent be8bcfe9b2
commit 1c003c6118
7 changed files with 308 additions and 50 deletions

View File

@@ -1,3 +1,4 @@
import type { FileHandle } from 'node:fs/promises'
import type { DecodeOptions, EncodeOptions } from '../../toon/src'
import type { InputSource } from './types'
import * as fsp from 'node:fs/promises'
@@ -34,38 +35,42 @@ export async function encodeToToon(config: {
flattenDepth: config.flattenDepth,
}
let toonOutput: string
// When printing stats, we need the full string for token counting
if (config.printStats) {
toonOutput = encode(data, encodeOptions)
}
else {
// Use streaming encoder for non-stats path
const lines = Array.from(encodeLines(data, encodeOptions))
toonOutput = lines.join('\n')
}
const toonOutput = encode(data, encodeOptions)
if (config.output) {
await fsp.writeFile(config.output, toonOutput, 'utf-8')
const relativeInputPath = formatInputLabel(config.input)
const relativeOutputPath = path.relative(process.cwd(), config.output)
consola.success(`Encoded \`${relativeInputPath}\`\`${relativeOutputPath}\``)
}
else {
console.log(toonOutput)
}
if (config.output) {
await fsp.writeFile(config.output, toonOutput, 'utf-8')
}
else {
console.log(toonOutput)
}
if (config.printStats) {
const jsonTokens = estimateTokenCount(jsonContent)
const toonTokens = estimateTokenCount(toonOutput)
const diff = jsonTokens - toonTokens
const percent = ((diff / jsonTokens) * 100).toFixed(1)
if (config.output) {
const relativeInputPath = formatInputLabel(config.input)
const relativeOutputPath = path.relative(process.cwd(), config.output)
consola.success(`Encoded \`${relativeInputPath}\`\`${relativeOutputPath}\``)
}
console.log()
consola.info(`Token estimates: ~${jsonTokens} (JSON) → ~${toonTokens} (TOON)`)
consola.success(`Saved ~${diff} tokens (-${percent}%)`)
}
else {
// Use streaming encoder for memory-efficient output
await writeStreamingToon(encodeLines(data, encodeOptions), config.output)
if (config.output) {
const relativeInputPath = formatInputLabel(config.input)
const relativeOutputPath = path.relative(process.cwd(), config.output)
consola.success(`Encoded \`${relativeInputPath}\`\`${relativeOutputPath}\``)
}
}
}
export async function decodeToJson(config: {
@@ -102,3 +107,50 @@ export async function decodeToJson(config: {
console.log(jsonOutput)
}
}
/**
* Writes TOON lines to a file or stdout using streaming approach.
* Lines are written one at a time without building the full string in memory.
*
* @param lines - Iterable of TOON lines (without trailing newlines)
* @param outputPath - File path to write to, or undefined for stdout
*/
async function writeStreamingToon(
lines: Iterable<string>,
outputPath?: string,
): Promise<void> {
let isFirst = true
// Stream to file using fs/promises API
if (outputPath) {
let fileHandle: FileHandle | undefined
try {
fileHandle = await fsp.open(outputPath, 'w')
for (const line of lines) {
if (!isFirst)
await fileHandle.write('\n')
await fileHandle.write(line)
isFirst = false
}
}
finally {
await fileHandle?.close()
}
}
// Stream to stdout
else {
for (const line of lines) {
if (!isFirst)
process.stdout.write('\n')
process.stdout.write(line)
isFirst = false
}
// Add final newline for stdout
process.stdout.write('\n')
}
}