diff --git a/docs/cli/index.md b/docs/cli/index.md index 41407d2..ffb0e94 100644 --- a/docs/cli/index.md +++ b/docs/cli/index.md @@ -102,6 +102,16 @@ cat data.json | toon - cat data.toon | toon --decode ``` +## Performance + +### Streaming Encoding + +JSON→TOON conversions use line-by-line encoding internally, which avoids holding the entire TOON document in memory. This makes the CLI efficient for large datasets without requiring additional configuration. + +::: info Token Statistics +When using the `--stats` flag, the CLI builds the full TOON string once to compute accurate token counts. For maximum memory efficiency on very large files, omit `--stats`. +::: + ## Options | Option | Description | diff --git a/docs/reference/api.md b/docs/reference/api.md index 0141947..f08ba00 100644 --- a/docs/reference/api.md +++ b/docs/reference/api.md @@ -127,6 +127,68 @@ encode(data, { delimiter: '\t', keyFolding: 'safe' }) ``` ::: +## `encodeLines(value, options?)` + +Converts any JSON-serializable value to TOON format as a sequence of lines, without building the full string in memory. Suitable for streaming large outputs to files, HTTP responses, or process stdout. + +```ts +import { encodeLines } from '@toon-format/toon' + +// Stream to stdout +for (const line of encodeLines(data)) { + console.log(line) +} + +// Write to file line-by-line +const lines = encodeLines(data, { indent: 2, delimiter: '\t' }) +for (const line of lines) { + await writeToStream(`${line}\n`) +} + +// Collect to array +const lineArray = Array.from(encodeLines(data)) +``` + +### Parameters + +| Parameter | Type | Description | +|-----------|------|-------------| +| `value` | `unknown` | Any JSON-serializable value (object, array, primitive, or nested structure) | +| `options` | `EncodeOptions?` | Optional encoding options (same as `encode()`) | + +### Return Value + +Returns an `Iterable` that yields TOON lines one at a time. Each yielded string is a single line without a trailing newline character. + +::: info Relationship to `encode()` +`encode(value, options)` is equivalent to: +```ts +Array.from(encodeLines(value, options)).join('\n') +``` +::: + +### Example + +```ts +import { createWriteStream } from 'node:fs' +import { encodeLines } from '@toon-format/toon' + +const data = { + items: Array.from({ length: 100000 }, (_, i) => ({ + id: i, + name: `Item ${i}`, + value: Math.random() + })) +} + +// Stream large dataset to file +const stream = createWriteStream('output.toon') +for (const line of encodeLines(data, { delimiter: '\t' })) { + stream.write(`${line}\n`) +} +stream.end() +``` + ## `decode(input, options?)` Converts a TOON-formatted string back to JavaScript values. diff --git a/packages/cli/src/conversion.ts b/packages/cli/src/conversion.ts index f830032..d3210d4 100644 --- a/packages/cli/src/conversion.ts +++ b/packages/cli/src/conversion.ts @@ -5,7 +5,7 @@ import * as path from 'node:path' import process from 'node:process' import { consola } from 'consola' import { estimateTokenCount } from 'tokenx' -import { decode, encode } from '../../toon/src' +import { decode, encode, encodeLines } from '../../toon/src' import { formatInputLabel, readInput } from './utils' export async function encodeToToon(config: { @@ -34,7 +34,17 @@ export async function encodeToToon(config: { flattenDepth: config.flattenDepth, } - const toonOutput = encode(data, encodeOptions) + let toonOutput: string + + // When printing stats, we need the full string for token counting + if (config.printStats) { + toonOutput = encode(data, encodeOptions) + } + else { + // Use streaming encoder for non-stats path + const lines = Array.from(encodeLines(data, encodeOptions)) + toonOutput = lines.join('\n') + } if (config.output) { await fsp.writeFile(config.output, toonOutput, 'utf-8') diff --git a/packages/toon/src/encode/encoders.ts b/packages/toon/src/encode/encoders.ts index b20f221..28ffccc 100644 --- a/packages/toon/src/encode/encoders.ts +++ b/packages/toon/src/encode/encoders.ts @@ -1,34 +1,42 @@ import type { Depth, JsonArray, JsonObject, JsonPrimitive, JsonValue, ResolvedEncodeOptions } from '../types' -import { DOT, LIST_ITEM_MARKER } from '../constants' +import { DOT, LIST_ITEM_MARKER, LIST_ITEM_PREFIX } from '../constants' import { tryFoldKeyChain } from './folding' import { isArrayOfArrays, isArrayOfObjects, isArrayOfPrimitives, isEmptyObject, isJsonArray, isJsonObject, isJsonPrimitive } from './normalize' import { encodeAndJoinPrimitives, encodeKey, encodePrimitive, formatHeader } from './primitives' -import { LineWriter } from './writer' // #region Encode normalized JsonValue -export function encodeValue(value: JsonValue, options: ResolvedEncodeOptions): string { +export function* encodeJsonValue(value: JsonValue, options: ResolvedEncodeOptions, depth: Depth): Generator { if (isJsonPrimitive(value)) { - return encodePrimitive(value, options.delimiter) - } + // Primitives at root level are returned as a single line + const encodedPrimitive = encodePrimitive(value, options.delimiter) - const writer = new LineWriter(options.indent) + if (encodedPrimitive !== '') + yield encodedPrimitive + + return + } if (isJsonArray(value)) { - encodeArray(undefined, value, writer, 0, options) + yield* encodeArrayLines(undefined, value, depth, options) } else if (isJsonObject(value)) { - encodeObject(value, writer, 0, options) + yield* encodeObjectLines(value, depth, options) } - - return writer.toString() } // #endregion // #region Object encoding -export function encodeObject(value: JsonObject, writer: LineWriter, depth: Depth, options: ResolvedEncodeOptions, rootLiteralKeys?: Set, pathPrefix?: string, remainingDepth?: number): void { +export function* encodeObjectLines( + value: JsonObject, + depth: Depth, + options: ResolvedEncodeOptions, + rootLiteralKeys?: Set, + pathPrefix?: string, + remainingDepth?: number, +): Generator { const keys = Object.keys(value) // At root level (depth 0), collect all literal dotted keys for collision checking @@ -39,11 +47,20 @@ export function encodeObject(value: JsonObject, writer: LineWriter, depth: Depth const effectiveFlattenDepth = remainingDepth ?? options.flattenDepth for (const [key, val] of Object.entries(value)) { - encodeKeyValuePair(key, val, writer, depth, options, keys, rootLiteralKeys, pathPrefix, effectiveFlattenDepth) + yield* encodeKeyValuePairLines(key, val, depth, options, keys, rootLiteralKeys, pathPrefix, effectiveFlattenDepth) } } -export function encodeKeyValuePair(key: string, value: JsonValue, writer: LineWriter, depth: Depth, options: ResolvedEncodeOptions, siblings?: readonly string[], rootLiteralKeys?: Set, pathPrefix?: string, flattenDepth?: number): void { +export function* encodeKeyValuePairLines( + key: string, + value: JsonValue, + depth: Depth, + options: ResolvedEncodeOptions, + siblings?: readonly string[], + rootLiteralKeys?: Set, + pathPrefix?: string, + flattenDepth?: number, +): Generator { const currentPath = pathPrefix ? `${pathPrefix}${DOT}${key}` : key const effectiveFlattenDepth = flattenDepth ?? options.flattenDepth @@ -59,26 +76,26 @@ export function encodeKeyValuePair(key: string, value: JsonValue, writer: LineWr if (remainder === undefined) { // The folded chain ended at a leaf (primitive, array, or empty object) if (isJsonPrimitive(leafValue)) { - writer.push(depth, `${encodedFoldedKey}: ${encodePrimitive(leafValue, options.delimiter)}`) + yield indentedLine(depth, `${encodedFoldedKey}: ${encodePrimitive(leafValue, options.delimiter)}`, options.indent) return } else if (isJsonArray(leafValue)) { - encodeArray(foldedKey, leafValue, writer, depth, options) + yield* encodeArrayLines(foldedKey, leafValue, depth, options) return } else if (isJsonObject(leafValue) && isEmptyObject(leafValue)) { - writer.push(depth, `${encodedFoldedKey}:`) + yield indentedLine(depth, `${encodedFoldedKey}:`, options.indent) return } } // Case 2: Partially folded with a tail object if (isJsonObject(remainder)) { - writer.push(depth, `${encodedFoldedKey}:`) + yield indentedLine(depth, `${encodedFoldedKey}:`, options.indent) // Calculate remaining depth budget (subtract segments already folded) const remainingDepth = effectiveFlattenDepth - segmentCount const foldedPath = pathPrefix ? `${pathPrefix}${DOT}${foldedKey}` : foldedKey - encodeObject(remainder, writer, depth + 1, options, rootLiteralKeys, foldedPath, remainingDepth) + yield* encodeObjectLines(remainder, depth + 1, options, rootLiteralKeys, foldedPath, remainingDepth) return } } @@ -88,15 +105,15 @@ export function encodeKeyValuePair(key: string, value: JsonValue, writer: LineWr const encodedKey = encodeKey(key) if (isJsonPrimitive(value)) { - writer.push(depth, `${encodedKey}: ${encodePrimitive(value, options.delimiter)}`) + yield indentedLine(depth, `${encodedKey}: ${encodePrimitive(value, options.delimiter)}`, options.indent) } else if (isJsonArray(value)) { - encodeArray(key, value, writer, depth, options) + yield* encodeArrayLines(key, value, depth, options) } else if (isJsonObject(value)) { - writer.push(depth, `${encodedKey}:`) + yield indentedLine(depth, `${encodedKey}:`, options.indent) if (!isEmptyObject(value)) { - encodeObject(value, writer, depth + 1, options, rootLiteralKeys, currentPath, effectiveFlattenDepth) + yield* encodeObjectLines(value, depth + 1, options, rootLiteralKeys, currentPath, effectiveFlattenDepth) } } } @@ -105,23 +122,22 @@ export function encodeKeyValuePair(key: string, value: JsonValue, writer: LineWr // #region Array encoding -export function encodeArray( +export function* encodeArrayLines( key: string | undefined, value: JsonArray, - writer: LineWriter, depth: Depth, options: ResolvedEncodeOptions, -): void { +): Generator { if (value.length === 0) { const header = formatHeader(0, { key, delimiter: options.delimiter }) - writer.push(depth, header) + yield indentedLine(depth, header, options.indent) return } // Primitive array if (isArrayOfPrimitives(value)) { const arrayLine = encodeInlineArrayLine(value, options.delimiter, key) - writer.push(depth, arrayLine) + yield indentedLine(depth, arrayLine, options.indent) return } @@ -129,7 +145,7 @@ export function encodeArray( if (isArrayOfArrays(value)) { const allPrimitiveArrays = value.every(arr => isArrayOfPrimitives(arr)) if (allPrimitiveArrays) { - encodeArrayOfArraysAsListItems(key, value, writer, depth, options) + yield* encodeArrayOfArraysAsListItemsLines(key, value, depth, options) return } } @@ -138,36 +154,35 @@ export function encodeArray( if (isArrayOfObjects(value)) { const header = extractTabularHeader(value) if (header) { - encodeArrayOfObjectsAsTabular(key, value, header, writer, depth, options) + yield* encodeArrayOfObjectsAsTabularLines(key, value, header, depth, options) } else { - encodeMixedArrayAsListItems(key, value, writer, depth, options) + yield* encodeMixedArrayAsListItemsLines(key, value, depth, options) } return } // Mixed array: fallback to expanded format - encodeMixedArrayAsListItems(key, value, writer, depth, options) + yield* encodeMixedArrayAsListItemsLines(key, value, depth, options) } // #endregion // #region Array of arrays (expanded format) -export function encodeArrayOfArraysAsListItems( +export function* encodeArrayOfArraysAsListItemsLines( prefix: string | undefined, values: readonly JsonArray[], - writer: LineWriter, depth: Depth, options: ResolvedEncodeOptions, -): void { +): Generator { const header = formatHeader(values.length, { key: prefix, delimiter: options.delimiter }) - writer.push(depth, header) + yield indentedLine(depth, header, options.indent) for (const arr of values) { if (isArrayOfPrimitives(arr)) { const arrayLine = encodeInlineArrayLine(arr, options.delimiter) - writer.pushListItem(depth + 1, arrayLine) + yield indentedListItem(depth + 1, arrayLine, options.indent) } } } @@ -186,18 +201,17 @@ export function encodeInlineArrayLine(values: readonly JsonPrimitive[], delimite // #region Array of objects (tabular format) -export function encodeArrayOfObjectsAsTabular( +export function* encodeArrayOfObjectsAsTabularLines( prefix: string | undefined, rows: readonly JsonObject[], header: readonly string[], - writer: LineWriter, depth: Depth, options: ResolvedEncodeOptions, -): void { +): Generator { const formattedHeader = formatHeader(rows.length, { key: prefix, fields: header, delimiter: options.delimiter }) - writer.push(depth, `${formattedHeader}`) + yield indentedLine(depth, formattedHeader, options.indent) - writeTabularRows(rows, header, writer, depth + 1, options) + yield* writeTabularRowsLines(rows, header, depth + 1, options) } export function extractTabularHeader(rows: readonly JsonObject[]): string[] | undefined { @@ -240,17 +254,16 @@ export function isTabularArray( return true } -function writeTabularRows( +function* writeTabularRowsLines( rows: readonly JsonObject[], header: readonly string[], - writer: LineWriter, depth: Depth, options: ResolvedEncodeOptions, -): void { +): Generator { for (const row of rows) { const values = header.map(key => row[key]) const joinedValue = encodeAndJoinPrimitives(values as JsonPrimitive[], options.delimiter) - writer.push(depth, joinedValue) + yield indentedLine(depth, joinedValue, options.indent) } } @@ -258,24 +271,27 @@ function writeTabularRows( // #region Array of objects (expanded format) -export function encodeMixedArrayAsListItems( +export function* encodeMixedArrayAsListItemsLines( prefix: string | undefined, items: readonly JsonValue[], - writer: LineWriter, depth: Depth, options: ResolvedEncodeOptions, -): void { +): Generator { const header = formatHeader(items.length, { key: prefix, delimiter: options.delimiter }) - writer.push(depth, header) + yield indentedLine(depth, header, options.indent) for (const item of items) { - encodeListItemValue(item, writer, depth + 1, options) + yield* encodeListItemValueLines(item, depth + 1, options) } } -export function encodeObjectAsListItem(obj: JsonObject, writer: LineWriter, depth: Depth, options: ResolvedEncodeOptions): void { +export function* encodeObjectAsListItemLines( + obj: JsonObject, + depth: Depth, + options: ResolvedEncodeOptions, +): Generator { if (isEmptyObject(obj)) { - writer.push(depth, LIST_ITEM_MARKER) + yield indentedLine(depth, LIST_ITEM_MARKER, options.indent) return } @@ -284,13 +300,13 @@ export function encodeObjectAsListItem(obj: JsonObject, writer: LineWriter, dept const encodedKey = encodeKey(firstKey) if (isJsonPrimitive(firstValue)) { - writer.pushListItem(depth, `${encodedKey}: ${encodePrimitive(firstValue, options.delimiter)}`) + yield indentedListItem(depth, `${encodedKey}: ${encodePrimitive(firstValue, options.delimiter)}`, options.indent) } else if (isJsonArray(firstValue)) { if (isArrayOfPrimitives(firstValue)) { // Inline format for primitive arrays const arrayPropertyLine = encodeInlineArrayLine(firstValue, options.delimiter, firstKey) - writer.pushListItem(depth, arrayPropertyLine) + yield indentedListItem(depth, arrayPropertyLine, options.indent) } else if (isArrayOfObjects(firstValue)) { // Check if array of objects can use tabular format @@ -298,38 +314,38 @@ export function encodeObjectAsListItem(obj: JsonObject, writer: LineWriter, dept if (header) { // Tabular format for uniform arrays of objects const formattedHeader = formatHeader(firstValue.length, { key: firstKey, fields: header, delimiter: options.delimiter }) - writer.pushListItem(depth, formattedHeader) - writeTabularRows(firstValue, header, writer, depth + 1, options) + yield indentedListItem(depth, formattedHeader, options.indent) + yield* writeTabularRowsLines(firstValue, header, depth + 1, options) } else { // Fall back to list format for non-uniform arrays of objects - writer.pushListItem(depth, `${encodedKey}[${firstValue.length}]:`) + yield indentedListItem(depth, `${encodedKey}[${firstValue.length}]:`, options.indent) for (const item of firstValue) { - encodeObjectAsListItem(item, writer, depth + 1, options) + yield* encodeObjectAsListItemLines(item, depth + 1, options) } } } else { // Complex arrays on separate lines (array of arrays, etc.) - writer.pushListItem(depth, `${encodedKey}[${firstValue.length}]:`) + yield indentedListItem(depth, `${encodedKey}[${firstValue.length}]:`, options.indent) // Encode array contents at depth + 1 for (const item of firstValue) { - encodeListItemValue(item, writer, depth + 1, options) + yield* encodeListItemValueLines(item, depth + 1, options) } } } else if (isJsonObject(firstValue)) { - writer.pushListItem(depth, `${encodedKey}:`) + yield indentedListItem(depth, `${encodedKey}:`, options.indent) if (!isEmptyObject(firstValue)) { - encodeObject(firstValue, writer, depth + 2, options) + yield* encodeObjectLines(firstValue, depth + 2, options) } } // Remaining entries on indented lines for (let i = 1; i < entries.length; i++) { const [key, value] = entries[i]! - encodeKeyValuePair(key, value, writer, depth + 1, options) + yield* encodeKeyValuePairLines(key, value, depth + 1, options) } } @@ -337,22 +353,34 @@ export function encodeObjectAsListItem(obj: JsonObject, writer: LineWriter, dept // #region List item encoding helpers -function encodeListItemValue( +function* encodeListItemValueLines( value: JsonValue, - writer: LineWriter, depth: Depth, options: ResolvedEncodeOptions, -): void { +): Generator { if (isJsonPrimitive(value)) { - writer.pushListItem(depth, encodePrimitive(value, options.delimiter)) + yield indentedListItem(depth, encodePrimitive(value, options.delimiter), options.indent) } else if (isJsonArray(value) && isArrayOfPrimitives(value)) { const arrayLine = encodeInlineArrayLine(value, options.delimiter) - writer.pushListItem(depth, arrayLine) + yield indentedListItem(depth, arrayLine, options.indent) } else if (isJsonObject(value)) { - encodeObjectAsListItem(value, writer, depth, options) + yield* encodeObjectAsListItemLines(value, depth, options) } } // #endregion + +// #region Indentation helpers + +function indentedLine(depth: Depth, content: string, indentSize: number): string { + const indentation = ' '.repeat(indentSize * depth) + return indentation + content +} + +function indentedListItem(depth: Depth, content: string, indentSize: number): string { + return indentedLine(depth, LIST_ITEM_PREFIX + content, indentSize) +} + +// #endregion diff --git a/packages/toon/src/encode/writer.ts b/packages/toon/src/encode/writer.ts deleted file mode 100644 index 7e04a1e..0000000 --- a/packages/toon/src/encode/writer.ts +++ /dev/null @@ -1,24 +0,0 @@ -import type { Depth } from '../types' -import { LIST_ITEM_PREFIX } from '../constants' - -export class LineWriter { - private readonly lines: string[] = [] - private readonly indentationString: string - - constructor(indentSize: number) { - this.indentationString = ' '.repeat(indentSize) - } - - push(depth: Depth, content: string): void { - const indent = this.indentationString.repeat(depth) - this.lines.push(indent + content) - } - - pushListItem(depth: Depth, content: string): void { - this.push(depth, `${LIST_ITEM_PREFIX}${content}`) - } - - toString(): string { - return this.lines.join('\n') - } -} diff --git a/packages/toon/src/index.ts b/packages/toon/src/index.ts index e0b870d..b28c379 100644 --- a/packages/toon/src/index.ts +++ b/packages/toon/src/index.ts @@ -3,7 +3,7 @@ import { DEFAULT_DELIMITER } from './constants' import { decodeValueFromLines } from './decode/decoders' import { expandPathsSafe } from './decode/expand' import { LineCursor, toParsedLines } from './decode/scanner' -import { encodeValue } from './encode/encoders' +import { encodeJsonValue } from './encode/encoders' import { normalizeValue } from './encode/normalize' export { DEFAULT_DELIMITER, DELIMITERS } from './constants' @@ -20,6 +20,36 @@ export type { ResolvedEncodeOptions, } from './types' +/** + * Encodes a JavaScript value into TOON format as a sequence of lines. + * + * This function yields TOON lines one at a time without building the full string, + * making it suitable for streaming large outputs to files, HTTP responses, or process stdout. + * + * @param input - Any JavaScript value (objects, arrays, primitives) + * @param options - Optional encoding configuration + * @returns Iterable of TOON lines (without trailing newlines) + * + * @example + * ```ts + * // Stream to stdout + * for (const line of encodeLines({ name: 'Alice', age: 30 })) { + * console.log(line) + * } + * + * // Collect to array + * const lines = Array.from(encodeLines(data)) + * + * // Equivalent to encode() + * const toonString = Array.from(encodeLines(data, options)).join('\n') + * ``` + */ +export function encodeLines(input: unknown, options?: EncodeOptions): Iterable { + const normalizedValue = normalizeValue(input) + const resolvedOptions = resolveOptions(options) + return encodeJsonValue(normalizedValue, resolvedOptions, 0) +} + /** * Encodes a JavaScript value into TOON format string. * @@ -42,9 +72,7 @@ export type { * ``` */ export function encode(input: unknown, options?: EncodeOptions): string { - const normalizedValue = normalizeValue(input) - const resolvedOptions = resolveOptions(options) - return encodeValue(normalizedValue, resolvedOptions) + return Array.from(encodeLines(input, options)).join('\n') } /** diff --git a/packages/toon/test/encodeLines.test.ts b/packages/toon/test/encodeLines.test.ts new file mode 100644 index 0000000..6366d6c --- /dev/null +++ b/packages/toon/test/encodeLines.test.ts @@ -0,0 +1,56 @@ +import { describe, expect, it } from 'vitest' +import { encodeLines } from '../src/index' + +describe('encodeLines', () => { + it('should yield lines without newline characters', () => { + const value = { name: 'Alice', age: 30, city: 'Paris' } + const lines = Array.from(encodeLines(value)) + + for (const line of lines) { + expect(line).not.toContain('\n') + } + }) + + it('should yield zero lines for empty object', () => { + const lines = Array.from(encodeLines({})) + + expect(lines.length).toBe(0) + }) + + it('should be iterable with for-of loop', () => { + const value = { x: 10, y: 20 } + const collectedLines: string[] = [] + + for (const line of encodeLines(value)) { + collectedLines.push(line) + } + + expect(collectedLines.length).toBe(2) + expect(collectedLines[0]).toBe('x: 10') + expect(collectedLines[1]).toBe('y: 20') + }) + + it('should not have trailing spaces in lines', () => { + const value = { + user: { + name: 'Alice', + tags: ['a', 'b'], + nested: { + deep: 'value', + }, + }, + } + const lines = Array.from(encodeLines(value)) + + for (const line of lines) { + expect(line).not.toMatch(/\s$/) + } + }) + + it('should yield correct number of lines', () => { + const value = { a: 1, b: 2, c: 3 } + const lines = Array.from(encodeLines(value)) + + expect(lines.length).toBe(3) + }) +})