feat: streaming decode functionality with event-based parsing (closes #131)

This commit is contained in:
Johann Schopplich
2025-11-21 22:29:57 +01:00
parent 9ebad53ea3
commit 6c57a14009
19 changed files with 2220 additions and 431 deletions

View File

@@ -134,8 +134,9 @@ cat million-records.toon | toon --decode > output.json
**Memory efficiency:**
- **Encode (JSON → TOON)**: Streams TOON lines to output without full string in memory
- **Decode (TOON → JSON)**: Streams JSON tokens to output without full string in memory
- **Decode (TOON → JSON)**: Uses the same event-based streaming decoder as the `decodeStream` API in `@toon-format/toon`, streaming JSON tokens to output without full string in memory
- Peak memory usage scales with data depth, not total size
- When `--expand-paths safe` is enabled, decode falls back to non-streaming mode internally to apply deep-merge expansion before writing JSON
> [!NOTE]
> When using `--stats` with encode, the full output string is kept in memory for token counting. Omit `--stats` for maximum memory efficiency with very large datasets.

View File

@@ -1,14 +1,15 @@
import type { FileHandle } from 'node:fs/promises'
import type { DecodeOptions, EncodeOptions } from '../../toon/src'
import type { DecodeOptions, DecodeStreamOptions, EncodeOptions } from '../../toon/src'
import type { InputSource } from './types'
import * as fsp from 'node:fs/promises'
import * as path from 'node:path'
import process from 'node:process'
import { consola } from 'consola'
import { estimateTokenCount } from 'tokenx'
import { decode, encode, encodeLines } from '../../toon/src'
import { decode, decodeStream, encode, encodeLines } from '../../toon/src'
import { jsonStreamFromEvents } from './json-from-events'
import { jsonStringifyLines } from './json-stringify-stream'
import { formatInputLabel, readInput } from './utils'
import { formatInputLabel, readInput, readLinesFromSource } from './utils'
export async function encodeToToon(config: {
input: InputSource
@@ -80,22 +81,43 @@ export async function decodeToJson(config: {
strict: NonNullable<DecodeOptions['strict']>
expandPaths?: NonNullable<DecodeOptions['expandPaths']>
}): Promise<void> {
const toonContent = await readInput(config.input)
// Path expansion requires full value in memory, so use non-streaming path
if (config.expandPaths === 'safe') {
const toonContent = await readInput(config.input)
let data: unknown
try {
const decodeOptions: DecodeOptions = {
indent: config.indent,
strict: config.strict,
expandPaths: config.expandPaths,
let data: unknown
try {
const decodeOptions: DecodeOptions = {
indent: config.indent,
strict: config.strict,
expandPaths: config.expandPaths,
}
data = decode(toonContent, decodeOptions)
}
catch (error) {
throw new Error(`Failed to decode TOON: ${error instanceof Error ? error.message : String(error)}`)
}
data = decode(toonContent, decodeOptions)
}
catch (error) {
throw new Error(`Failed to decode TOON: ${error instanceof Error ? error.message : String(error)}`)
}
await writeStreamingJson(jsonStringifyLines(data, config.indent), config.output)
await writeStreamingJson(jsonStringifyLines(data, config.indent), config.output)
}
else {
try {
const lineSource = readLinesFromSource(config.input)
const decodeStreamOptions: DecodeStreamOptions = {
indent: config.indent,
strict: config.strict,
}
const events = decodeStream(lineSource, decodeStreamOptions)
const jsonChunks = jsonStreamFromEvents(events, config.indent)
await writeStreamingJson(jsonChunks, config.output)
}
catch (error) {
throw new Error(`Failed to decode TOON: ${error instanceof Error ? error.message : String(error)}`)
}
}
if (config.output) {
const relativeInputPath = formatInputLabel(config.input)
@@ -109,7 +131,7 @@ export async function decodeToJson(config: {
* Chunks are written one at a time without building the full string in memory.
*/
async function writeStreamingJson(
chunks: Iterable<string>,
chunks: AsyncIterable<string> | Iterable<string>,
outputPath?: string,
): Promise<void> {
// Stream to file using fs/promises API
@@ -119,7 +141,7 @@ async function writeStreamingJson(
try {
fileHandle = await fsp.open(outputPath, 'w')
for (const chunk of chunks) {
for await (const chunk of chunks) {
await fileHandle.write(chunk)
}
}
@@ -129,7 +151,7 @@ async function writeStreamingJson(
}
// Stream to stdout
else {
for (const chunk of chunks) {
for await (const chunk of chunks) {
process.stdout.write(chunk)
}

View File

@@ -0,0 +1,217 @@
import type { JsonStreamEvent } from '../../toon/src/types'
/**
* Context for tracking JSON structure state during event streaming.
*/
type JsonContext
= | { type: 'object', needsComma: boolean, expectValue: boolean }
| { type: 'array', needsComma: boolean }
/**
* Converts a stream of `JsonStreamEvent` into formatted JSON string chunks.
*
* Similar to `jsonStringifyLines` but driven by events instead of a value tree.
* Useful for streaming TOON decode directly to JSON output without building
* the full data structure in memory.
*
* @param events - Async iterable of JSON stream events
* @param indent - Number of spaces for indentation (0 = compact, >0 = pretty)
* @returns Async iterable of JSON string chunks
*
* @example
* ```ts
* const lines = readLinesFromSource(input)
* const events = decodeStream(lines)
* for await (const chunk of jsonStreamFromEvents(events, 2)) {
* process.stdout.write(chunk)
* }
* ```
*/
export async function* jsonStreamFromEvents(
events: AsyncIterable<JsonStreamEvent>,
indent: number = 2,
): AsyncIterable<string> {
const stack: JsonContext[] = []
let depth = 0
for await (const event of events) {
const parent = stack.length > 0 ? stack[stack.length - 1] : undefined
switch (event.type) {
case 'startObject': {
// Emit comma if needed (inside array or after previous object field value)
if (parent) {
if (parent.type === 'array' && parent.needsComma) {
yield ','
}
else if (parent.type === 'object' && !parent.expectValue) {
// Object field value already emitted, this is a nested object after a key
// The comma is handled by the key event
}
}
// Emit newline and indent for pretty printing
if (indent > 0 && parent) {
if (parent.type === 'array') {
yield '\n'
yield ' '.repeat(depth * indent)
}
}
yield '{'
stack.push({ type: 'object', needsComma: false, expectValue: false })
depth++
break
}
case 'endObject': {
const context = stack.pop()
if (!context || context.type !== 'object') {
throw new Error('Mismatched endObject event')
}
depth--
// Emit newline and indent for closing brace (pretty print)
if (indent > 0 && context.needsComma) {
yield '\n'
yield ' '.repeat(depth * indent)
}
yield '}'
// Mark parent as needing comma for next item
const newParent = stack.length > 0 ? stack[stack.length - 1] : undefined
if (newParent) {
if (newParent.type === 'object') {
newParent.expectValue = false
newParent.needsComma = true
}
else if (newParent.type === 'array') {
newParent.needsComma = true
}
}
break
}
case 'startArray': {
// Emit comma if needed
if (parent) {
if (parent.type === 'array' && parent.needsComma) {
yield ','
}
}
// Emit newline and indent for pretty printing
if (indent > 0 && parent) {
if (parent.type === 'array') {
yield '\n'
yield ' '.repeat(depth * indent)
}
}
yield '['
stack.push({
type: 'array',
needsComma: false,
})
depth++
break
}
case 'endArray': {
const context = stack.pop()
if (!context || context.type !== 'array') {
throw new Error('Mismatched endArray event')
}
depth--
// Emit newline and indent for closing bracket (pretty print)
if (indent > 0 && context.needsComma) {
yield '\n'
yield ' '.repeat(depth * indent)
}
yield ']'
// Mark parent as needing comma for next item
const newParent = stack.length > 0 ? stack[stack.length - 1] : undefined
if (newParent) {
if (newParent.type === 'object') {
newParent.expectValue = false
newParent.needsComma = true
}
else if (newParent.type === 'array') {
newParent.needsComma = true
}
}
break
}
case 'key': {
if (!parent || parent.type !== 'object') {
throw new Error('Key event outside of object context')
}
// Emit comma before this field if needed
if (parent.needsComma) {
yield ','
}
// Emit newline and indent (pretty print)
if (indent > 0) {
yield '\n'
yield ' '.repeat(depth * indent)
}
// Emit key
yield JSON.stringify(event.key)
yield indent > 0 ? ': ' : ':'
parent.expectValue = true
parent.needsComma = true
break
}
case 'primitive': {
// Emit comma if needed
if (parent) {
if (parent.type === 'array' && parent.needsComma) {
yield ','
}
else if (parent.type === 'object' && !parent.expectValue) {
// This shouldn't happen in well-formed events
throw new Error('Primitive event in object without preceding key')
}
}
// Emit newline and indent for array items (pretty print)
if (indent > 0 && parent && parent.type === 'array') {
yield '\n'
yield ' '.repeat(depth * indent)
}
// Emit primitive value
yield JSON.stringify(event.value)
// Update parent context
if (parent) {
if (parent.type === 'object') {
parent.expectValue = false
// needsComma already true from key event
}
else if (parent.type === 'array') {
parent.needsComma = true
}
}
break
}
}
}
// Ensure stack is empty
if (stack.length !== 0) {
throw new Error('Incomplete event stream: unclosed objects or arrays')
}
}

View File

@@ -1,4 +1,5 @@
import type { InputSource } from './types'
import { createReadStream } from 'node:fs'
import * as fsp from 'node:fs/promises'
import * as path from 'node:path'
import process from 'node:process'
@@ -77,3 +78,32 @@ function readFromStdin(): Promise<string> {
stdin.resume()
})
}
export async function* readLinesFromSource(source: InputSource): AsyncIterable<string> {
const stream = source.type === 'stdin'
? process.stdin
: createReadStream(source.path, { encoding: 'utf-8' })
// Explicitly set encoding for stdin
if (source.type === 'stdin') {
stream.setEncoding('utf-8')
}
let buffer = ''
for await (const chunk of stream) {
buffer += chunk
let index: number
while ((index = buffer.indexOf('\n')) !== -1) {
const line = buffer.slice(0, index)
buffer = buffer.slice(index + 1)
yield line
}
}
// Emit last line if buffer is not empty and doesn't end with newline
if (buffer.length > 0) {
yield buffer
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,334 @@
import type { JsonObject, JsonStreamEvent, JsonValue } from '../types'
import { QUOTED_KEY_MARKER } from './expand'
// #region Build context types
/**
* Stack context for building JSON values from events.
*/
type BuildContext
= | { type: 'object', obj: JsonObject, currentKey?: string, quotedKeys: Set<string> }
| { type: 'array', arr: JsonValue[] }
// #endregion
// #region Synchronous AST builder
export function buildValueFromEvents(events: Iterable<JsonStreamEvent>): JsonValue {
const stack: BuildContext[] = []
let root: JsonValue | undefined
for (const event of events) {
switch (event.type) {
case 'startObject': {
const obj: JsonObject = {}
const quotedKeys = new Set<string>()
if (stack.length === 0) {
// Root object
stack.push({ type: 'object', obj, quotedKeys })
}
else {
const parent = stack[stack.length - 1]!
if (parent.type === 'object') {
if (parent.currentKey === undefined) {
throw new Error('Object startObject event without preceding key')
}
parent.obj[parent.currentKey] = obj
parent.currentKey = undefined
}
else if (parent.type === 'array') {
parent.arr.push(obj)
}
stack.push({ type: 'object', obj, quotedKeys })
}
break
}
case 'endObject': {
if (stack.length === 0) {
throw new Error('Unexpected endObject event')
}
const context = stack.pop()!
if (context.type !== 'object') {
throw new Error('Mismatched endObject event')
}
// Attach quoted keys metadata if any keys were quoted
if (context.quotedKeys.size > 0) {
Object.defineProperty(context.obj, QUOTED_KEY_MARKER, {
value: context.quotedKeys,
enumerable: false,
writable: false,
configurable: false,
})
}
if (stack.length === 0) {
root = context.obj
}
break
}
case 'startArray': {
const arr: JsonValue[] = []
if (stack.length === 0) {
// Root array
stack.push({ type: 'array', arr })
}
else {
const parent = stack[stack.length - 1]!
if (parent.type === 'object') {
if (parent.currentKey === undefined) {
throw new Error('Array startArray event without preceding key')
}
parent.obj[parent.currentKey] = arr
parent.currentKey = undefined
}
else if (parent.type === 'array') {
parent.arr.push(arr)
}
stack.push({ type: 'array', arr })
}
break
}
case 'endArray': {
if (stack.length === 0) {
throw new Error('Unexpected endArray event')
}
const context = stack.pop()!
if (context.type !== 'array') {
throw new Error('Mismatched endArray event')
}
if (stack.length === 0) {
root = context.arr
}
break
}
case 'key': {
if (stack.length === 0) {
throw new Error('Key event outside of object context')
}
const parent = stack[stack.length - 1]!
if (parent.type !== 'object') {
throw new Error('Key event in non-object context')
}
parent.currentKey = event.key
// Track quoted keys for path expansion
if (event.wasQuoted) {
parent.quotedKeys.add(event.key)
}
break
}
case 'primitive': {
if (stack.length === 0) {
// Root primitive
root = event.value
}
else {
const parent = stack[stack.length - 1]!
if (parent.type === 'object') {
if (parent.currentKey === undefined) {
throw new Error('Primitive event without preceding key in object')
}
parent.obj[parent.currentKey] = event.value
parent.currentKey = undefined
}
else if (parent.type === 'array') {
parent.arr.push(event.value)
}
}
break
}
}
}
if (stack.length !== 0) {
throw new Error('Incomplete event stream: stack not empty at end')
}
if (root === undefined) {
throw new Error('No root value built from events')
}
return root
}
// #endregion
// #region Asynchronous AST builder
export async function buildValueFromEventsAsync(events: AsyncIterable<JsonStreamEvent>): Promise<JsonValue> {
const stack: BuildContext[] = []
let root: JsonValue | undefined
for await (const event of events) {
switch (event.type) {
case 'startObject': {
const obj: JsonObject = {}
const quotedKeys = new Set<string>()
if (stack.length === 0) {
stack.push({ type: 'object', obj, quotedKeys })
}
else {
const parent = stack[stack.length - 1]!
if (parent.type === 'object') {
if (parent.currentKey === undefined) {
throw new Error('Object startObject event without preceding key')
}
parent.obj[parent.currentKey] = obj
parent.currentKey = undefined
}
else if (parent.type === 'array') {
parent.arr.push(obj)
}
stack.push({ type: 'object', obj, quotedKeys })
}
break
}
case 'endObject': {
if (stack.length === 0) {
throw new Error('Unexpected endObject event')
}
const context = stack.pop()!
if (context.type !== 'object') {
throw new Error('Mismatched endObject event')
}
// Attach quoted keys metadata if any keys were quoted
if (context.quotedKeys.size > 0) {
Object.defineProperty(context.obj, QUOTED_KEY_MARKER, {
value: context.quotedKeys,
enumerable: false,
writable: false,
configurable: false,
})
}
if (stack.length === 0) {
root = context.obj
}
break
}
case 'startArray': {
const arr: JsonValue[] = []
if (stack.length === 0) {
stack.push({ type: 'array', arr })
}
else {
const parent = stack[stack.length - 1]!
if (parent.type === 'object') {
if (parent.currentKey === undefined) {
throw new Error('Array startArray event without preceding key')
}
parent.obj[parent.currentKey] = arr
parent.currentKey = undefined
}
else if (parent.type === 'array') {
parent.arr.push(arr)
}
stack.push({ type: 'array', arr })
}
break
}
case 'endArray': {
if (stack.length === 0) {
throw new Error('Unexpected endArray event')
}
const context = stack.pop()!
if (context.type !== 'array') {
throw new Error('Mismatched endArray event')
}
if (stack.length === 0) {
root = context.arr
}
break
}
case 'key': {
if (stack.length === 0) {
throw new Error('Key event outside of object context')
}
const parent = stack[stack.length - 1]!
if (parent.type !== 'object') {
throw new Error('Key event in non-object context')
}
parent.currentKey = event.key
// Track quoted keys for path expansion
if (event.wasQuoted) {
parent.quotedKeys.add(event.key)
}
break
}
case 'primitive': {
if (stack.length === 0) {
root = event.value
}
else {
const parent = stack[stack.length - 1]!
if (parent.type === 'object') {
if (parent.currentKey === undefined) {
throw new Error('Primitive event without preceding key in object')
}
parent.obj[parent.currentKey] = event.value
parent.currentKey = undefined
}
else if (parent.type === 'array') {
parent.arr.push(event.value)
}
}
break
}
}
}
if (stack.length !== 0) {
throw new Error('Incomplete event stream: stack not empty at end')
}
if (root === undefined) {
throw new Error('No root value built from events')
}
return root
}
// #endregion

View File

@@ -12,7 +12,7 @@ import { isIdentifierSegment } from '../shared/validation'
export const QUOTED_KEY_MARKER: unique symbol = Symbol('quotedKey')
/**
* Type for objects that may have quoted key metadata attached.
* Objects that may have quoted key metadata attached.
*/
export interface ObjectWithQuotedKeys extends JsonObject {
[QUOTED_KEY_MARKER]?: Set<string>
@@ -226,6 +226,10 @@ function mergeObjects(
// #endregion
// #region Type Guards
function canMerge(a: JsonValue, b: JsonValue): a is JsonObject {
return isJsonObject(a) && isJsonObject(b)
}
// #endregion

View File

@@ -305,11 +305,11 @@ export function parseKeyToken(content: string, start: number): { key: string, en
// #region Array content detection helpers
export function isArrayHeaderAfterHyphen(content: string): boolean {
export function isArrayHeaderContent(content: string): boolean {
return content.trim().startsWith(OPEN_BRACKET) && findUnquotedChar(content, COLON) !== -1
}
export function isObjectFirstFieldAfterHyphen(content: string): boolean {
export function isKeyValueContent(content: string): boolean {
return findUnquotedChar(content, COLON) !== -1
}

View File

@@ -1,109 +1,109 @@
import type { BlankLineInfo, Depth, ParsedLine } from '../types'
import { SPACE, TAB } from '../constants'
export interface ScanResult {
lines: ParsedLine[]
// #region Scan state
export interface StreamingScanState {
lineNumber: number
blankLines: BlankLineInfo[]
}
export class LineCursor {
private lines: ParsedLine[]
private index: number
private blankLines: BlankLineInfo[]
constructor(lines: ParsedLine[], blankLines: BlankLineInfo[] = []) {
this.lines = lines
this.index = 0
this.blankLines = blankLines
}
getBlankLines(): BlankLineInfo[] {
return this.blankLines
}
peek(): ParsedLine | undefined {
return this.lines[this.index]
}
next(): ParsedLine | undefined {
return this.lines[this.index++]
}
current(): ParsedLine | undefined {
return this.index > 0 ? this.lines[this.index - 1] : undefined
}
advance(): void {
this.index++
}
atEnd(): boolean {
return this.index >= this.lines.length
}
get length(): number {
return this.lines.length
}
peekAtDepth(targetDepth: Depth): ParsedLine | undefined {
const line = this.peek()
return line?.depth === targetDepth ? line : undefined
export function createScanState(): StreamingScanState {
return {
lineNumber: 0,
blankLines: [],
}
}
export function toParsedLines(source: string, indentSize: number, strict: boolean): ScanResult {
if (!source.trim()) {
return { lines: [], blankLines: [] }
// #endregion
// #region Line parsing
export function parseLineIncremental(
raw: string,
state: StreamingScanState,
indentSize: number,
strict: boolean,
): ParsedLine | undefined {
state.lineNumber++
const lineNumber = state.lineNumber
// Count leading spaces
let indent = 0
while (indent < raw.length && raw[indent] === SPACE) {
indent++
}
const lines = source.split('\n')
const parsed: ParsedLine[] = []
const blankLines: BlankLineInfo[] = []
for (let i = 0; i < lines.length; i++) {
const raw = lines[i]!
const lineNumber = i + 1
let indent = 0
while (indent < raw.length && raw[indent] === SPACE) {
indent++
}
const content = raw.slice(indent)
// Track blank lines
if (!content.trim()) {
const depth = computeDepthFromIndent(indent, indentSize)
blankLines.push({ lineNumber, indent, depth })
continue
}
const content = raw.slice(indent)
// Track blank lines
if (!content.trim()) {
const depth = computeDepthFromIndent(indent, indentSize)
// Strict mode validation
if (strict) {
// Find the full leading whitespace region (spaces and tabs)
let whitespaceEndIndex = 0
while (whitespaceEndIndex < raw.length && (raw[whitespaceEndIndex] === SPACE || raw[whitespaceEndIndex] === TAB)) {
whitespaceEndIndex++
}
// Check for tabs in leading whitespace (before actual content)
if (raw.slice(0, whitespaceEndIndex).includes(TAB)) {
throw new SyntaxError(`Line ${lineNumber}: Tabs are not allowed in indentation in strict mode`)
}
// Check for exact multiples of indentSize
if (indent > 0 && indent % indentSize !== 0) {
throw new SyntaxError(`Line ${lineNumber}: Indentation must be exact multiple of ${indentSize}, but found ${indent} spaces`)
}
}
parsed.push({ raw, indent, content, depth, lineNumber })
state.blankLines.push({ lineNumber, indent, depth })
return undefined
}
return { lines: parsed, blankLines }
const depth = computeDepthFromIndent(indent, indentSize)
// Strict mode validation
if (strict) {
// Find the full leading whitespace region (spaces and tabs)
let whitespaceEndIndex = 0
while (
whitespaceEndIndex < raw.length
&& (raw[whitespaceEndIndex] === SPACE || raw[whitespaceEndIndex] === TAB)
) {
whitespaceEndIndex++
}
// Check for tabs in leading whitespace (before actual content)
if (raw.slice(0, whitespaceEndIndex).includes(TAB)) {
throw new SyntaxError(`Line ${lineNumber}: Tabs are not allowed in indentation in strict mode`)
}
// Check for exact multiples of indentSize
if (indent > 0 && indent % indentSize !== 0) {
throw new SyntaxError(
`Line ${lineNumber}: Indentation must be exact multiple of ${indentSize}, but found ${indent} spaces`,
)
}
}
return { raw, indent, content, depth, lineNumber }
}
export function* parseLinesSync(
source: Iterable<string>,
indentSize: number,
strict: boolean,
state: StreamingScanState,
): Generator<ParsedLine> {
for (const raw of source) {
const parsedLine = parseLineIncremental(raw, state, indentSize, strict)
if (parsedLine !== undefined) {
yield parsedLine
}
}
}
export async function* parseLinesAsync(
source: AsyncIterable<string>,
indentSize: number,
strict: boolean,
state: StreamingScanState,
): AsyncGenerator<ParsedLine> {
for await (const raw of source) {
const parsedLine = parseLineIncremental(raw, state, indentSize, strict)
if (parsedLine !== undefined) {
yield parsedLine
}
}
}
function computeDepthFromIndent(indentSpaces: number, indentSize: number): Depth {
return Math.floor(indentSpaces / indentSize)
}
// #endregion

View File

@@ -1,7 +1,8 @@
import type { ArrayHeaderInfo, BlankLineInfo, Delimiter, Depth, ResolvedDecodeOptions } from '../types'
import type { LineCursor } from './scanner'
import type { ArrayHeaderInfo, BlankLineInfo, Delimiter, Depth, ParsedLine } from '../types'
import { COLON, LIST_ITEM_PREFIX } from '../constants'
// #region Count and structure validation
/**
* Asserts that the actual count matches the expected count in strict mode.
*/
@@ -9,7 +10,7 @@ export function assertExpectedCount(
actual: number,
expected: number,
itemType: string,
options: ResolvedDecodeOptions,
options: { strict: boolean },
): void {
if (options.strict && actual !== expected) {
throw new RangeError(`Expected ${expected} ${itemType}, but got ${actual}`)
@@ -20,11 +21,10 @@ export function assertExpectedCount(
* Validates that there are no extra list items beyond the expected count.
*/
export function validateNoExtraListItems(
cursor: LineCursor,
nextLine: ParsedLine | undefined,
itemDepth: Depth,
expectedCount: number,
): void {
const nextLine = cursor.peek()
if (nextLine?.depth === itemDepth && nextLine.content.startsWith(LIST_ITEM_PREFIX)) {
throw new RangeError(`Expected ${expectedCount} list array items, but found more`)
}
@@ -34,11 +34,10 @@ export function validateNoExtraListItems(
* Validates that there are no extra tabular rows beyond the expected count.
*/
export function validateNoExtraTabularRows(
cursor: LineCursor,
nextLine: ParsedLine | undefined,
rowDepth: Depth,
header: ArrayHeaderInfo,
): void {
const nextLine = cursor.peek()
if (
nextLine?.depth === rowDepth
&& !nextLine.content.startsWith(LIST_ITEM_PREFIX)
@@ -62,8 +61,6 @@ export function validateNoBlankLinesInRange(
return
// Find blank lines within the range
// Note: We don't filter by depth because ANY blank line between array items is an error,
// regardless of its indentation level
const firstBlank = blankLines.find(
blank => blank.lineNumber > startLine && blank.lineNumber < endLine,
)
@@ -75,6 +72,10 @@ export function validateNoBlankLinesInRange(
}
}
// #endregion
// #region Row classification helpers
/**
* Checks if a line is a data row (vs a key-value pair) in a tabular array.
*/
@@ -95,3 +96,5 @@ function isDataRow(content: string, delimiter: Delimiter): boolean {
// Colon before delimiter or no delimiter = key-value pair
return false
}
// #endregion

View File

@@ -1,55 +1,27 @@
import type { DecodeOptions, EncodeOptions, JsonValue, ResolvedDecodeOptions, ResolvedEncodeOptions } from './types'
import type { DecodeOptions, DecodeStreamOptions, EncodeOptions, JsonStreamEvent, JsonValue, ResolvedDecodeOptions, ResolvedEncodeOptions } from './types'
import { DEFAULT_DELIMITER } from './constants'
import { decodeValueFromLines } from './decode/decoders'
import { decodeStream as decodeStreamCore, decodeStreamSync as decodeStreamSyncCore } from './decode/decoders'
import { buildValueFromEvents } from './decode/event-builder'
import { expandPathsSafe } from './decode/expand'
import { LineCursor, toParsedLines } from './decode/scanner'
import { encodeJsonValue } from './encode/encoders'
import { normalizeValue } from './encode/normalize'
export { DEFAULT_DELIMITER, DELIMITERS } from './constants'
export type {
DecodeOptions,
DecodeStreamOptions,
Delimiter,
DelimiterKey,
EncodeOptions,
JsonArray,
JsonObject,
JsonPrimitive,
JsonStreamEvent,
JsonValue,
ResolvedDecodeOptions,
ResolvedEncodeOptions,
} from './types'
/**
* Encodes a JavaScript value into TOON format as a sequence of lines.
*
* This function yields TOON lines one at a time without building the full string,
* making it suitable for streaming large outputs to files, HTTP responses, or process stdout.
*
* @param input - Any JavaScript value (objects, arrays, primitives)
* @param options - Optional encoding configuration
* @returns Iterable of TOON lines (without trailing newlines)
*
* @example
* ```ts
* // Stream to stdout
* for (const line of encodeLines({ name: 'Alice', age: 30 })) {
* console.log(line)
* }
*
* // Collect to array
* const lines = Array.from(encodeLines(data))
*
* // Equivalent to encode()
* const toonString = Array.from(encodeLines(data, options)).join('\n')
* ```
*/
export function encodeLines(input: unknown, options?: EncodeOptions): Iterable<string> {
const normalizedValue = normalizeValue(input)
const resolvedOptions = resolveOptions(options)
return encodeJsonValue(normalizedValue, resolvedOptions, 0)
}
/**
* Encodes a JavaScript value into TOON format string.
*
@@ -94,15 +66,69 @@ export function encode(input: unknown, options?: EncodeOptions): string {
* ```
*/
export function decode(input: string, options?: DecodeOptions): JsonValue {
const resolvedOptions = resolveDecodeOptions(options)
const scanResult = toParsedLines(input, resolvedOptions.indent, resolvedOptions.strict)
const lines = input.split('\n')
return decodeFromLines(lines, options)
}
if (scanResult.lines.length === 0) {
return {}
/**
* Encodes a JavaScript value into TOON format as a sequence of lines.
*
* This function yields TOON lines one at a time without building the full string,
* making it suitable for streaming large outputs to files, HTTP responses, or process stdout.
*
* @param input - Any JavaScript value (objects, arrays, primitives)
* @param options - Optional encoding configuration
* @returns Iterable of TOON lines (without trailing newlines)
*
* @example
* ```ts
* // Stream to stdout
* for (const line of encodeLines({ name: 'Alice', age: 30 })) {
* console.log(line)
* }
*
* // Collect to array
* const lines = Array.from(encodeLines(data))
*
* // Equivalent to encode()
* const toonString = Array.from(encodeLines(data, options)).join('\n')
* ```
*/
export function encodeLines(input: unknown, options?: EncodeOptions): Iterable<string> {
const normalizedValue = normalizeValue(input)
const resolvedOptions = resolveOptions(options)
return encodeJsonValue(normalizedValue, resolvedOptions, 0)
}
/**
* Decodes TOON format from pre-split lines into a JavaScript value.
*
* This is a convenience wrapper around the streaming decoder that builds
* the full value in memory. Useful when you already have lines as an array
* or iterable and want the standard decode behavior with path expansion support.
*
* @param lines - Iterable of TOON lines (without newlines)
* @param options - Optional decoding configuration (supports expandPaths)
* @returns Parsed JavaScript value (object, array, or primitive)
*
* @example
* ```ts
* const lines = ['name: Alice', 'age: 30']
* decodeFromLines(lines)
* // { name: 'Alice', age: 30 }
* ```
*/
export function decodeFromLines(lines: Iterable<string>, options?: DecodeOptions): JsonValue {
const resolvedOptions = resolveDecodeOptions(options)
// Use streaming decoder without expandPaths
const streamOptions: DecodeStreamOptions = {
indent: resolvedOptions.indent,
strict: resolvedOptions.strict,
}
const cursor = new LineCursor(scanResult.lines, scanResult.blankLines)
const decodedValue = decodeValueFromLines(cursor, resolvedOptions)
const events = decodeStreamSyncCore(lines, streamOptions)
const decodedValue = buildValueFromEvents(events)
// Apply path expansion if enabled
if (resolvedOptions.expandPaths === 'safe') {
@@ -112,6 +138,72 @@ export function decode(input: string, options?: DecodeOptions): JsonValue {
return decodedValue
}
/**
* Synchronously decodes TOON lines into a stream of JSON events.
*
* This function yields structured events (startObject, endObject, startArray, endArray,
* key, primitive) that represent the JSON data model without building the full value tree.
* Useful for streaming processing, custom transformations, or memory-efficient parsing.
*
* @remarks
* Path expansion (`expandPaths: 'safe'`) is not supported in streaming mode.
*
* @param lines - Iterable of TOON lines (without newlines)
* @param options - Optional decoding configuration (expandPaths not supported)
* @returns Iterable of JSON stream events
*
* @example
* ```ts
* const lines = ['name: Alice', 'age: 30']
* for (const event of decodeStreamSync(lines)) {
* console.log(event)
* // { type: 'startObject' }
* // { type: 'key', key: 'name' }
* // { type: 'primitive', value: 'Alice' }
* // ...
* }
* ```
*/
export function decodeStreamSync(lines: Iterable<string>, options?: DecodeStreamOptions): Iterable<JsonStreamEvent> {
return decodeStreamSyncCore(lines, options)
}
/**
* Asynchronously decodes TOON lines into a stream of JSON events.
*
* This function yields structured events (startObject, endObject, startArray, endArray,
* key, primitive) that represent the JSON data model without building the full value tree.
* Supports both sync and async iterables for maximum flexibility with file streams,
* network responses, or other async sources.
*
* @remarks
* Path expansion (`expandPaths: 'safe'`) is not supported in streaming mode.
*
* @param source - Async or sync iterable of TOON lines (without newlines)
* @param options - Optional decoding configuration (expandPaths not supported)
* @returns Async iterable of JSON stream events
*
* @example
* ```ts
* const fileStream = createReadStream('data.toon', 'utf-8')
* const lines = splitLines(fileStream) // Async iterable of lines
*
* for await (const event of decodeStream(lines)) {
* console.log(event)
* // { type: 'startObject' }
* // { type: 'key', key: 'name' }
* // { type: 'primitive', value: 'Alice' }
* // ...
* }
* ```
*/
export function decodeStream(
source: AsyncIterable<string> | Iterable<string>,
options?: DecodeStreamOptions,
): AsyncIterable<JsonStreamEvent> {
return decodeStreamCore(source, options)
}
function resolveOptions(options?: EncodeOptions): ResolvedEncodeOptions {
return {
indent: options?.indent ?? 2,

View File

@@ -69,6 +69,32 @@ export interface DecodeOptions {
export type ResolvedDecodeOptions = Readonly<Required<DecodeOptions>>
/**
* Options for streaming decode operations.
*
* @remarks
* Path expansion is not supported in streaming mode.
*/
export interface DecodeStreamOptions extends Omit<DecodeOptions, 'expandPaths'> {
/**
* Path expansion is not supported in streaming decode.
* This option is explicitly omitted.
*/
expandPaths?: never
}
// #endregion
// #region Streaming decoder types
export type JsonStreamEvent
= | { type: 'startObject' }
| { type: 'endObject' }
| { type: 'startArray', length: number }
| { type: 'endArray' }
| { type: 'key', key: string, wasQuoted?: boolean }
| { type: 'primitive', value: JsonPrimitive }
// #endregion
// #region Decoder parsing types

View File

@@ -0,0 +1,343 @@
import { describe, expect, it } from 'vitest'
import { buildValueFromEvents } from '../src/decode/event-builder'
import { decode, decodeFromLines, decodeStreamSync } from '../src/index'
describe('streaming decode', () => {
describe('decodeStreamSync', () => {
it('should decode simple object', () => {
const input = 'name: Alice\nage: 30'
const lines = input.split('\n')
const events = Array.from(decodeStreamSync(lines))
expect(events).toEqual([
{ type: 'startObject' },
{ type: 'key', key: 'name' },
{ type: 'primitive', value: 'Alice' },
{ type: 'key', key: 'age' },
{ type: 'primitive', value: 30 },
{ type: 'endObject' },
])
})
it('should decode nested object', () => {
const input = 'user:\n name: Alice\n age: 30'
const lines = input.split('\n')
const events = Array.from(decodeStreamSync(lines))
expect(events).toEqual([
{ type: 'startObject' },
{ type: 'key', key: 'user' },
{ type: 'startObject' },
{ type: 'key', key: 'name' },
{ type: 'primitive', value: 'Alice' },
{ type: 'key', key: 'age' },
{ type: 'primitive', value: 30 },
{ type: 'endObject' },
{ type: 'endObject' },
])
})
it('should decode inline primitive array', () => {
const input = 'scores[3]: 95, 87, 92'
const lines = input.split('\n')
const events = Array.from(decodeStreamSync(lines))
expect(events).toEqual([
{ type: 'startObject' },
{ type: 'key', key: 'scores' },
{ type: 'startArray', length: 3 },
{ type: 'primitive', value: 95 },
{ type: 'primitive', value: 87 },
{ type: 'primitive', value: 92 },
{ type: 'endArray' },
{ type: 'endObject' },
])
})
it('should decode list array', () => {
const input = 'items[2]:\n - Apple\n - Banana'
const lines = input.split('\n')
const events = Array.from(decodeStreamSync(lines))
expect(events).toEqual([
{ type: 'startObject' },
{ type: 'key', key: 'items' },
{ type: 'startArray', length: 2 },
{ type: 'primitive', value: 'Apple' },
{ type: 'primitive', value: 'Banana' },
{ type: 'endArray' },
{ type: 'endObject' },
])
})
it('should decode tabular array', () => {
const input = 'users[2]{name,age}:\n Alice, 30\n Bob, 25'
const lines = input.split('\n')
const events = Array.from(decodeStreamSync(lines))
expect(events).toEqual([
{ type: 'startObject' },
{ type: 'key', key: 'users' },
{ type: 'startArray', length: 2 },
{ type: 'startObject' },
{ type: 'key', key: 'name' },
{ type: 'primitive', value: 'Alice' },
{ type: 'key', key: 'age' },
{ type: 'primitive', value: 30 },
{ type: 'endObject' },
{ type: 'startObject' },
{ type: 'key', key: 'name' },
{ type: 'primitive', value: 'Bob' },
{ type: 'key', key: 'age' },
{ type: 'primitive', value: 25 },
{ type: 'endObject' },
{ type: 'endArray' },
{ type: 'endObject' },
])
})
it('should decode root primitive', () => {
const input = 'Hello World'
const lines = input.split('\n')
const events = Array.from(decodeStreamSync(lines))
expect(events).toEqual([
{ type: 'primitive', value: 'Hello World' },
])
})
it('should decode root array', () => {
const input = '[2]:\n - Apple\n - Banana'
const lines = input.split('\n')
const events = Array.from(decodeStreamSync(lines))
expect(events).toEqual([
{ type: 'startArray', length: 2 },
{ type: 'primitive', value: 'Apple' },
{ type: 'primitive', value: 'Banana' },
{ type: 'endArray' },
])
})
it('should decode empty input as empty object', () => {
const lines: string[] = []
const events = Array.from(decodeStreamSync(lines))
expect(events).toEqual([
{ type: 'startObject' },
{ type: 'endObject' },
])
})
it('should throw on expandPaths option', () => {
const input = 'name: Alice'
const lines = input.split('\n')
expect(() => Array.from(decodeStreamSync(lines, { expandPaths: 'safe' } as any)))
.toThrow('expandPaths is not supported in streaming decode')
})
it('should enforce strict mode validation', () => {
const input = 'items[2]:\n - Apple'
const lines = input.split('\n')
expect(() => Array.from(decodeStreamSync(lines, { strict: true })))
.toThrow()
})
it('should allow count mismatch in non-strict mode', () => {
const input = 'items[2]:\n - Apple'
const lines = input.split('\n')
// Should not throw in non-strict mode
const events = Array.from(decodeStreamSync(lines, { strict: false }))
expect(events).toBeDefined()
expect(events[0]).toEqual({ type: 'startObject' })
})
})
describe('buildValueFromEvents', () => {
it('should build object from events', () => {
const events = [
{ type: 'startObject' as const },
{ type: 'key' as const, key: 'name' },
{ type: 'primitive' as const, value: 'Alice' },
{ type: 'key' as const, key: 'age' },
{ type: 'primitive' as const, value: 30 },
{ type: 'endObject' as const },
]
const result = buildValueFromEvents(events)
expect(result).toEqual({ name: 'Alice', age: 30 })
})
it('should build nested object from events', () => {
const events = [
{ type: 'startObject' as const },
{ type: 'key' as const, key: 'user' },
{ type: 'startObject' as const },
{ type: 'key' as const, key: 'name' },
{ type: 'primitive' as const, value: 'Alice' },
{ type: 'endObject' as const },
{ type: 'endObject' as const },
]
const result = buildValueFromEvents(events)
expect(result).toEqual({ user: { name: 'Alice' } })
})
it('should build array from events', () => {
const events = [
{ type: 'startArray' as const, length: 3 },
{ type: 'primitive' as const, value: 1 },
{ type: 'primitive' as const, value: 2 },
{ type: 'primitive' as const, value: 3 },
{ type: 'endArray' as const },
]
const result = buildValueFromEvents(events)
expect(result).toEqual([1, 2, 3])
})
it('should build primitive from events', () => {
const events = [
{ type: 'primitive' as const, value: 'Hello' },
]
const result = buildValueFromEvents(events)
expect(result).toEqual('Hello')
})
it('should throw on incomplete event stream', () => {
const events = [
{ type: 'startObject' as const },
{ type: 'key' as const, key: 'name' },
// Missing primitive and endObject
]
expect(() => buildValueFromEvents(events))
.toThrow('Incomplete event stream')
})
})
describe('decodeFromLines', () => {
it('should produce same result as decode', () => {
const input = 'name: Alice\nage: 30\nscores[3]: 95, 87, 92'
const lines = input.split('\n')
const fromLines = decodeFromLines(lines)
const fromString = decode(input)
expect(fromLines).toEqual(fromString)
})
it('should support expandPaths option', () => {
const input = 'user.name: Alice\nuser.age: 30'
const lines = input.split('\n')
const result = decodeFromLines(lines, { expandPaths: 'safe' })
expect(result).toEqual({
user: {
name: 'Alice',
age: 30,
},
})
})
it('should handle complex nested structures', () => {
const input = [
'users[2]:',
' - name: Alice',
' scores[3]: 95, 87, 92',
' - name: Bob',
' scores[3]: 88, 91, 85',
].join('\n')
const fromLines = decodeFromLines(input.split('\n'))
const fromString = decode(input)
expect(fromLines).toEqual(fromString)
expect(fromLines).toEqual({
users: [
{ name: 'Alice', scores: [95, 87, 92] },
{ name: 'Bob', scores: [88, 91, 85] },
],
})
})
it('should handle tabular arrays', () => {
const input = [
'users[3]{name,age,city}:',
' Alice, 30, NYC',
' Bob, 25, LA',
' Charlie, 35, SF',
].join('\n')
const fromLines = decodeFromLines(input.split('\n'))
const fromString = decode(input)
expect(fromLines).toEqual(fromString)
expect(fromLines).toEqual({
users: [
{ name: 'Alice', age: 30, city: 'NYC' },
{ name: 'Bob', age: 25, city: 'LA' },
{ name: 'Charlie', age: 35, city: 'SF' },
],
})
})
})
describe('streaming equivalence', () => {
// Test that streaming produces same results as non-streaming for various inputs
const testCases = [
{
name: 'simple object',
input: 'name: Alice\nage: 30',
},
{
name: 'nested objects',
input: 'user:\n profile:\n name: Alice\n age: 30',
},
{
name: 'mixed structures',
input: 'name: Alice\nscores[3]: 95, 87, 92\naddress:\n city: NYC\n zip: 10001',
},
{
name: 'list array with objects',
input: 'users[2]:\n - name: Alice\n age: 30\n - name: Bob\n age: 25',
},
{
name: 'root primitive number',
input: '42',
},
{
name: 'root primitive string',
input: 'Hello World',
},
{
name: 'root primitive boolean',
input: 'true',
},
{
name: 'root primitive null',
input: 'null',
},
]
for (const testCase of testCases) {
it(`should match decode() for: ${testCase.name}`, () => {
const lines = testCase.input.split('\n')
const streamResult = decodeFromLines(lines)
const regularResult = decode(testCase.input)
expect(streamResult).toEqual(regularResult)
})
}
})
})