feat: streaming decode functionality with event-based parsing (closes #131)

This commit is contained in:
Johann Schopplich
2025-11-21 22:29:57 +01:00
parent 9ebad53ea3
commit 6c57a14009
19 changed files with 2220 additions and 431 deletions

View File

@@ -777,6 +777,46 @@ for (const line of encodeLines(largeData)) {
} }
``` ```
**Streaming decode:**
```ts
import { decodeFromLines, decodeStreamSync } from '@toon-format/toon'
// 1. Lines → value (build full JSON value)
const value = decodeFromLines([
'users[2]{id,name}:',
' 1,Alice',
' 2,Bob',
])
// { users: [{ id: 1, name: 'Alice' }, { id: 2, name: 'Bob' }] }
// 2. Lines → events (for custom streaming consumers)
const lines = [
'users[2]{id,name}:',
' 1,Alice',
' 2,Bob',
]
for (const event of decodeStreamSync(lines)) {
// { type: 'startObject' }, { type: 'key', key: 'users' }, ...
}
```
**Async streaming decode:**
```ts
// 3. Async streaming from files or network
import { createReadStream } from 'node:fs'
import { createInterface } from 'node:readline'
import { decodeStream } from '@toon-format/toon'
const fileStream = createReadStream('data.toon', 'utf-8')
const rl = createInterface({ input: fileStream })
for await (const event of decodeStream(rl)) {
// Process events as they arrive
}
```
## Playgrounds ## Playgrounds
Experiment with TOON format interactively using these community-built tools for token comparison, format conversion, and validation: Experiment with TOON format interactively using these community-built tools for token comparison, format conversion, and validation:

View File

@@ -108,19 +108,25 @@ cat data.toon | toon --decode
Both encoding and decoding operations use streaming output, writing incrementally without building the full output string in memory. This makes the CLI efficient for large datasets without requiring additional configuration. Both encoding and decoding operations use streaming output, writing incrementally without building the full output string in memory. This makes the CLI efficient for large datasets without requiring additional configuration.
**JSON → TOON (Encode)** **JSON → TOON (Encode)**:
- Streams TOON lines to output
- No full TOON string in memory
**TOON → JSON (Decode)** - Streams TOON lines to output.
- Streams JSON tokens to output - No full TOON string in memory.
- No full JSON string in memory
**TOON → JSON (Decode)**:
- Uses the same event-based streaming decoder as the `decodeStream` API in `@toon-format/toon`.
- Streams JSON tokens to output.
- No full JSON string in memory.
- When `--expand-paths safe` is enabled, falls back to non-streaming decode internally to apply deep-merge expansion before writing JSON.
Process large files with minimal memory usage:
```bash ```bash
# Encode large JSON file with minimal memory usage # Encode large JSON file
toon huge-dataset.json -o output.toon toon huge-dataset.json -o output.toon
# Decode large TOON file with minimal memory usage # Decode large TOON file
toon huge-dataset.toon -o output.json toon huge-dataset.toon -o output.json
# Process millions of records efficiently via stdin # Process millions of records efficiently via stdin

View File

@@ -237,3 +237,5 @@ Round-tripping is lossless: `decode(encode(x))` always equals `x` (after normali
## Where to Go Next ## Where to Go Next
Now that you've seen your first TOON document, read the [Format Overview](/guide/format-overview) for complete syntax details (objects, arrays, quoting rules, key folding), then explore [Using TOON with LLMs](/guide/llm-prompts) to see how to use it effectively in prompts. For implementation details, check the [API reference](/reference/api) (TypeScript) or the [specification](/reference/spec) (language-agnostic normative rules). Now that you've seen your first TOON document, read the [Format Overview](/guide/format-overview) for complete syntax details (objects, arrays, quoting rules, key folding), then explore [Using TOON with LLMs](/guide/llm-prompts) to see how to use it effectively in prompts. For implementation details, check the [API reference](/reference/api) (TypeScript) or the [specification](/reference/spec) (language-agnostic normative rules).
For large datasets or streaming use-cases, see `encodeLines`, `decodeFromLines`, and `decodeStream` in the [API reference](/reference/api).

View File

@@ -118,6 +118,31 @@ toon large-dataset.json --output output.toon
This streaming approach prevents out-of-memory errors when preparing large context windows for LLMs. For complete details on `encodeLines()`, see the [API reference](/reference/api#encodelines). This streaming approach prevents out-of-memory errors when preparing large context windows for LLMs. For complete details on `encodeLines()`, see the [API reference](/reference/api#encodelines).
**Consuming streaming LLM outputs:** If your LLM client exposes streaming text and you buffer by lines, you can decode TOON incrementally:
```ts
import { decodeFromLines } from '@toon-format/toon'
// Buffer streaming response into lines
const lines: string[] = []
let buffer = ''
for await (const chunk of modelStream) {
buffer += chunk
let index: number
while ((index = buffer.indexOf('\n')) !== -1) {
lines.push(buffer.slice(0, index))
buffer = buffer.slice(index + 1)
}
}
// Decode buffered lines
const data = decodeFromLines(lines)
```
For streaming decode APIs, see [`decodeFromLines()`](/reference/api#decodeFromLines-lines-options) and [`decodeStream()`](/reference/api#decodeStream-source-options).
## Tips and Pitfalls ## Tips and Pitfalls
**Show, don't describe.** Don't explain TOON syntax in detail just show an example. Models learn the pattern from context. A simple code block with 2-5 rows is more effective than paragraphs of explanation. **Show, don't describe.** Don't explain TOON syntax in detail just show an example. Models learn the pattern from context. A simple code block with 2-5 rows is more effective than paragraphs of explanation.

View File

@@ -300,6 +300,227 @@ decode(toon, { expandPaths: 'safe', strict: false })
``` ```
::: :::
## `decodeFromLines(lines, options?)`
Decodes TOON format from pre-split lines into a JavaScript value. This is a streaming-friendly wrapper around the event-based decoder that builds the full value in memory.
Useful when you already have lines as an array or iterable (e.g., from file streams, readline interfaces, or network responses) and want the standard decode behavior with path expansion support.
### Parameters
| Parameter | Type | Description |
|-----------|------|-------------|
| `lines` | `Iterable<string>` | Iterable of TOON lines (without trailing newlines) |
| `options` | `DecodeOptions?` | Optional decoding configuration (see below) |
### Options
| Option | Type | Default | Description |
|--------|------|---------|-------------|
| `indent` | `number` | `2` | Expected number of spaces per indentation level |
| `strict` | `boolean` | `true` | Enable strict validation (array counts, indentation, delimiter consistency) |
| `expandPaths` | `'off'` \| `'safe'` | `'off'` | Enable path expansion to reconstruct dotted keys into nested objects |
### Return Value
Returns a `JsonValue` (the parsed JavaScript value: object, array, or primitive).
### Example
**Basic usage with arrays:**
```ts
import { decodeFromLines } from '@toon-format/toon'
const lines = ['name: Alice', 'age: 30']
const value = decodeFromLines(lines)
// { name: 'Alice', age: 30 }
```
**Streaming from Node.js readline:**
```ts
import { createReadStream } from 'node:fs'
import { createInterface } from 'node:readline'
import { decodeFromLines } from '@toon-format/toon'
const rl = createInterface({
input: createReadStream('data.toon'),
crlfDelay: Infinity,
})
const value = decodeFromLines(rl)
console.log(value)
```
**With path expansion:**
```ts
const lines = ['user.name: Alice', 'user.age: 30']
const value = decodeFromLines(lines, { expandPaths: 'safe' })
// { user: { name: 'Alice', age: 30 } }
```
## `decodeStreamSync(lines, options?)`
Synchronously decodes TOON lines into a stream of JSON events. This function yields structured events that represent the JSON data model without building the full value tree.
Useful for streaming processing, custom transformations, or memory-efficient parsing of large datasets where you don't need the full value in memory.
::: info Event Streaming
This is a low-level API that returns individual parse events. For most use cases, [`decodeFromLines()`](#decodeFromLines-lines-options) or [`decode()`](#decode-input-options) are more convenient.
Path expansion (`expandPaths: 'safe'`) is **not supported** in streaming mode since it requires the full value tree.
:::
### Parameters
| Parameter | Type | Description |
|-----------|------|-------------|
| `lines` | `Iterable<string>` | Iterable of TOON lines (without trailing newlines) |
| `options` | `DecodeStreamOptions?` | Optional streaming decoding configuration (see below) |
### Options
| Option | Type | Default | Description |
|--------|------|---------|-------------|
| `indent` | `number` | `2` | Expected number of spaces per indentation level |
| `strict` | `boolean` | `true` | Enable strict validation (array counts, indentation, delimiter consistency) |
### Return Value
Returns an `Iterable<JsonStreamEvent>` that yields structured events.
### Event Types
Events represent the structure of the JSON data model:
```ts
type JsonStreamEvent
= | { type: 'startObject' }
| { type: 'endObject' }
| { type: 'startArray' }
| { type: 'endArray' }
| { type: 'key', key: string }
| { type: 'primitive', value: JsonPrimitive }
type JsonPrimitive = string | number | boolean | null
```
### Example
**Basic event streaming:**
```ts
import { decodeStreamSync } from '@toon-format/toon'
const lines = ['name: Alice', 'age: 30']
for (const event of decodeStreamSync(lines)) {
console.log(event)
}
// Output:
// { type: 'startObject' }
// { type: 'key', key: 'name' }
// { type: 'primitive', value: 'Alice' }
// { type: 'key', key: 'age' }
// { type: 'primitive', value: 30 }
// { type: 'endObject' }
```
**Custom processing:**
```ts
import { decodeStreamSync } from '@toon-format/toon'
const lines = ['users[2]{id,name}:', ' 1,Alice', ' 2,Bob']
let userCount = 0
for (const event of decodeStreamSync(lines)) {
if (event.type === 'endObject' && userCount < 2) {
userCount++
console.log(`Processed user ${userCount}`)
}
}
```
## `decodeStream(source, options?)`
Asynchronously decodes TOON lines into a stream of JSON events. This is the async version of [`decodeStreamSync()`](#decodeStreamSync-lines-options), supporting both synchronous and asynchronous iterables.
Useful for processing file streams, network responses, or other async sources where you want to handle data incrementally as it arrives.
### Parameters
| Parameter | Type | Description |
|-----------|------|-------------|
| `source` | `AsyncIterable<string>` \| `Iterable<string>` | Async or sync iterable of TOON lines (without trailing newlines) |
| `options` | `DecodeStreamOptions?` | Optional streaming decoding configuration (see below) |
### Options
| Option | Type | Default | Description |
|--------|------|---------|-------------|
| `indent` | `number` | `2` | Expected number of spaces per indentation level |
| `strict` | `boolean` | `true` | Enable strict validation (array counts, indentation, delimiter consistency) |
### Return Value
Returns an `AsyncIterable<JsonStreamEvent>` that yields structured events asynchronously.
### Example
**Streaming from file:**
```ts
import { createReadStream } from 'node:fs'
import { createInterface } from 'node:readline'
import { decodeStream } from '@toon-format/toon'
const fileStream = createReadStream('data.toon', 'utf-8')
const rl = createInterface({ input: fileStream, crlfDelay: Infinity })
for await (const event of decodeStream(rl)) {
console.log(event)
// Process events as they arrive
}
```
**Processing events incrementally:**
```ts
import { decodeStream } from '@toon-format/toon'
const lines = getAsyncLineSource() // AsyncIterable<string>
for await (const event of decodeStream(lines, { strict: true })) {
if (event.type === 'key' && event.key === 'id') {
// Next event will be the id value
const valueEvent = await decodeStream(lines).next()
if (valueEvent.value?.type === 'primitive') {
console.log('Found ID:', valueEvent.value.value)
}
}
}
```
**Auto-detection of sync/async sources:**
```ts
// Works with sync iterables
const syncLines = ['name: Alice', 'age: 30']
for await (const event of decodeStream(syncLines)) {
console.log(event)
}
// Works with async iterables
const asyncLines = readLinesFromNetwork()
for await (const event of decodeStream(asyncLines)) {
console.log(event)
}
```
## Round-Trip Compatibility ## Round-Trip Compatibility
TOON provides lossless round-trips after normalization: TOON provides lossless round-trips after normalization:

View File

@@ -1,10 +1,15 @@
// @ts-check // @ts-check
import antfu from '@antfu/eslint-config' import antfu from '@antfu/eslint-config'
export default antfu().append({ export default antfu({
rules: {
'no-cond-assign': 'off',
},
}).append({
files: ['README.md', 'SPEC.md', '**/docs/**/*'], files: ['README.md', 'SPEC.md', '**/docs/**/*'],
rules: { rules: {
'yaml/quotes': 'off', 'import/no-duplicates': 'off',
'style/no-tabs': 'off', 'style/no-tabs': 'off',
'yaml/quotes': 'off',
}, },
}) })

View File

@@ -134,8 +134,9 @@ cat million-records.toon | toon --decode > output.json
**Memory efficiency:** **Memory efficiency:**
- **Encode (JSON → TOON)**: Streams TOON lines to output without full string in memory - **Encode (JSON → TOON)**: Streams TOON lines to output without full string in memory
- **Decode (TOON → JSON)**: Streams JSON tokens to output without full string in memory - **Decode (TOON → JSON)**: Uses the same event-based streaming decoder as the `decodeStream` API in `@toon-format/toon`, streaming JSON tokens to output without full string in memory
- Peak memory usage scales with data depth, not total size - Peak memory usage scales with data depth, not total size
- When `--expand-paths safe` is enabled, decode falls back to non-streaming mode internally to apply deep-merge expansion before writing JSON
> [!NOTE] > [!NOTE]
> When using `--stats` with encode, the full output string is kept in memory for token counting. Omit `--stats` for maximum memory efficiency with very large datasets. > When using `--stats` with encode, the full output string is kept in memory for token counting. Omit `--stats` for maximum memory efficiency with very large datasets.

View File

@@ -1,14 +1,15 @@
import type { FileHandle } from 'node:fs/promises' import type { FileHandle } from 'node:fs/promises'
import type { DecodeOptions, EncodeOptions } from '../../toon/src' import type { DecodeOptions, DecodeStreamOptions, EncodeOptions } from '../../toon/src'
import type { InputSource } from './types' import type { InputSource } from './types'
import * as fsp from 'node:fs/promises' import * as fsp from 'node:fs/promises'
import * as path from 'node:path' import * as path from 'node:path'
import process from 'node:process' import process from 'node:process'
import { consola } from 'consola' import { consola } from 'consola'
import { estimateTokenCount } from 'tokenx' import { estimateTokenCount } from 'tokenx'
import { decode, encode, encodeLines } from '../../toon/src' import { decode, decodeStream, encode, encodeLines } from '../../toon/src'
import { jsonStreamFromEvents } from './json-from-events'
import { jsonStringifyLines } from './json-stringify-stream' import { jsonStringifyLines } from './json-stringify-stream'
import { formatInputLabel, readInput } from './utils' import { formatInputLabel, readInput, readLinesFromSource } from './utils'
export async function encodeToToon(config: { export async function encodeToToon(config: {
input: InputSource input: InputSource
@@ -80,22 +81,43 @@ export async function decodeToJson(config: {
strict: NonNullable<DecodeOptions['strict']> strict: NonNullable<DecodeOptions['strict']>
expandPaths?: NonNullable<DecodeOptions['expandPaths']> expandPaths?: NonNullable<DecodeOptions['expandPaths']>
}): Promise<void> { }): Promise<void> {
const toonContent = await readInput(config.input) // Path expansion requires full value in memory, so use non-streaming path
if (config.expandPaths === 'safe') {
const toonContent = await readInput(config.input)
let data: unknown let data: unknown
try { try {
const decodeOptions: DecodeOptions = { const decodeOptions: DecodeOptions = {
indent: config.indent, indent: config.indent,
strict: config.strict, strict: config.strict,
expandPaths: config.expandPaths, expandPaths: config.expandPaths,
}
data = decode(toonContent, decodeOptions)
}
catch (error) {
throw new Error(`Failed to decode TOON: ${error instanceof Error ? error.message : String(error)}`)
} }
data = decode(toonContent, decodeOptions)
}
catch (error) {
throw new Error(`Failed to decode TOON: ${error instanceof Error ? error.message : String(error)}`)
}
await writeStreamingJson(jsonStringifyLines(data, config.indent), config.output) await writeStreamingJson(jsonStringifyLines(data, config.indent), config.output)
}
else {
try {
const lineSource = readLinesFromSource(config.input)
const decodeStreamOptions: DecodeStreamOptions = {
indent: config.indent,
strict: config.strict,
}
const events = decodeStream(lineSource, decodeStreamOptions)
const jsonChunks = jsonStreamFromEvents(events, config.indent)
await writeStreamingJson(jsonChunks, config.output)
}
catch (error) {
throw new Error(`Failed to decode TOON: ${error instanceof Error ? error.message : String(error)}`)
}
}
if (config.output) { if (config.output) {
const relativeInputPath = formatInputLabel(config.input) const relativeInputPath = formatInputLabel(config.input)
@@ -109,7 +131,7 @@ export async function decodeToJson(config: {
* Chunks are written one at a time without building the full string in memory. * Chunks are written one at a time without building the full string in memory.
*/ */
async function writeStreamingJson( async function writeStreamingJson(
chunks: Iterable<string>, chunks: AsyncIterable<string> | Iterable<string>,
outputPath?: string, outputPath?: string,
): Promise<void> { ): Promise<void> {
// Stream to file using fs/promises API // Stream to file using fs/promises API
@@ -119,7 +141,7 @@ async function writeStreamingJson(
try { try {
fileHandle = await fsp.open(outputPath, 'w') fileHandle = await fsp.open(outputPath, 'w')
for (const chunk of chunks) { for await (const chunk of chunks) {
await fileHandle.write(chunk) await fileHandle.write(chunk)
} }
} }
@@ -129,7 +151,7 @@ async function writeStreamingJson(
} }
// Stream to stdout // Stream to stdout
else { else {
for (const chunk of chunks) { for await (const chunk of chunks) {
process.stdout.write(chunk) process.stdout.write(chunk)
} }

View File

@@ -0,0 +1,217 @@
import type { JsonStreamEvent } from '../../toon/src/types'
/**
* Context for tracking JSON structure state during event streaming.
*/
type JsonContext
= | { type: 'object', needsComma: boolean, expectValue: boolean }
| { type: 'array', needsComma: boolean }
/**
* Converts a stream of `JsonStreamEvent` into formatted JSON string chunks.
*
* Similar to `jsonStringifyLines` but driven by events instead of a value tree.
* Useful for streaming TOON decode directly to JSON output without building
* the full data structure in memory.
*
* @param events - Async iterable of JSON stream events
* @param indent - Number of spaces for indentation (0 = compact, >0 = pretty)
* @returns Async iterable of JSON string chunks
*
* @example
* ```ts
* const lines = readLinesFromSource(input)
* const events = decodeStream(lines)
* for await (const chunk of jsonStreamFromEvents(events, 2)) {
* process.stdout.write(chunk)
* }
* ```
*/
export async function* jsonStreamFromEvents(
events: AsyncIterable<JsonStreamEvent>,
indent: number = 2,
): AsyncIterable<string> {
const stack: JsonContext[] = []
let depth = 0
for await (const event of events) {
const parent = stack.length > 0 ? stack[stack.length - 1] : undefined
switch (event.type) {
case 'startObject': {
// Emit comma if needed (inside array or after previous object field value)
if (parent) {
if (parent.type === 'array' && parent.needsComma) {
yield ','
}
else if (parent.type === 'object' && !parent.expectValue) {
// Object field value already emitted, this is a nested object after a key
// The comma is handled by the key event
}
}
// Emit newline and indent for pretty printing
if (indent > 0 && parent) {
if (parent.type === 'array') {
yield '\n'
yield ' '.repeat(depth * indent)
}
}
yield '{'
stack.push({ type: 'object', needsComma: false, expectValue: false })
depth++
break
}
case 'endObject': {
const context = stack.pop()
if (!context || context.type !== 'object') {
throw new Error('Mismatched endObject event')
}
depth--
// Emit newline and indent for closing brace (pretty print)
if (indent > 0 && context.needsComma) {
yield '\n'
yield ' '.repeat(depth * indent)
}
yield '}'
// Mark parent as needing comma for next item
const newParent = stack.length > 0 ? stack[stack.length - 1] : undefined
if (newParent) {
if (newParent.type === 'object') {
newParent.expectValue = false
newParent.needsComma = true
}
else if (newParent.type === 'array') {
newParent.needsComma = true
}
}
break
}
case 'startArray': {
// Emit comma if needed
if (parent) {
if (parent.type === 'array' && parent.needsComma) {
yield ','
}
}
// Emit newline and indent for pretty printing
if (indent > 0 && parent) {
if (parent.type === 'array') {
yield '\n'
yield ' '.repeat(depth * indent)
}
}
yield '['
stack.push({
type: 'array',
needsComma: false,
})
depth++
break
}
case 'endArray': {
const context = stack.pop()
if (!context || context.type !== 'array') {
throw new Error('Mismatched endArray event')
}
depth--
// Emit newline and indent for closing bracket (pretty print)
if (indent > 0 && context.needsComma) {
yield '\n'
yield ' '.repeat(depth * indent)
}
yield ']'
// Mark parent as needing comma for next item
const newParent = stack.length > 0 ? stack[stack.length - 1] : undefined
if (newParent) {
if (newParent.type === 'object') {
newParent.expectValue = false
newParent.needsComma = true
}
else if (newParent.type === 'array') {
newParent.needsComma = true
}
}
break
}
case 'key': {
if (!parent || parent.type !== 'object') {
throw new Error('Key event outside of object context')
}
// Emit comma before this field if needed
if (parent.needsComma) {
yield ','
}
// Emit newline and indent (pretty print)
if (indent > 0) {
yield '\n'
yield ' '.repeat(depth * indent)
}
// Emit key
yield JSON.stringify(event.key)
yield indent > 0 ? ': ' : ':'
parent.expectValue = true
parent.needsComma = true
break
}
case 'primitive': {
// Emit comma if needed
if (parent) {
if (parent.type === 'array' && parent.needsComma) {
yield ','
}
else if (parent.type === 'object' && !parent.expectValue) {
// This shouldn't happen in well-formed events
throw new Error('Primitive event in object without preceding key')
}
}
// Emit newline and indent for array items (pretty print)
if (indent > 0 && parent && parent.type === 'array') {
yield '\n'
yield ' '.repeat(depth * indent)
}
// Emit primitive value
yield JSON.stringify(event.value)
// Update parent context
if (parent) {
if (parent.type === 'object') {
parent.expectValue = false
// needsComma already true from key event
}
else if (parent.type === 'array') {
parent.needsComma = true
}
}
break
}
}
}
// Ensure stack is empty
if (stack.length !== 0) {
throw new Error('Incomplete event stream: unclosed objects or arrays')
}
}

View File

@@ -1,4 +1,5 @@
import type { InputSource } from './types' import type { InputSource } from './types'
import { createReadStream } from 'node:fs'
import * as fsp from 'node:fs/promises' import * as fsp from 'node:fs/promises'
import * as path from 'node:path' import * as path from 'node:path'
import process from 'node:process' import process from 'node:process'
@@ -77,3 +78,32 @@ function readFromStdin(): Promise<string> {
stdin.resume() stdin.resume()
}) })
} }
export async function* readLinesFromSource(source: InputSource): AsyncIterable<string> {
const stream = source.type === 'stdin'
? process.stdin
: createReadStream(source.path, { encoding: 'utf-8' })
// Explicitly set encoding for stdin
if (source.type === 'stdin') {
stream.setEncoding('utf-8')
}
let buffer = ''
for await (const chunk of stream) {
buffer += chunk
let index: number
while ((index = buffer.indexOf('\n')) !== -1) {
const line = buffer.slice(0, index)
buffer = buffer.slice(index + 1)
yield line
}
}
// Emit last line if buffer is not empty and doesn't end with newline
if (buffer.length > 0) {
yield buffer
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,334 @@
import type { JsonObject, JsonStreamEvent, JsonValue } from '../types'
import { QUOTED_KEY_MARKER } from './expand'
// #region Build context types
/**
* Stack context for building JSON values from events.
*/
type BuildContext
= | { type: 'object', obj: JsonObject, currentKey?: string, quotedKeys: Set<string> }
| { type: 'array', arr: JsonValue[] }
// #endregion
// #region Synchronous AST builder
export function buildValueFromEvents(events: Iterable<JsonStreamEvent>): JsonValue {
const stack: BuildContext[] = []
let root: JsonValue | undefined
for (const event of events) {
switch (event.type) {
case 'startObject': {
const obj: JsonObject = {}
const quotedKeys = new Set<string>()
if (stack.length === 0) {
// Root object
stack.push({ type: 'object', obj, quotedKeys })
}
else {
const parent = stack[stack.length - 1]!
if (parent.type === 'object') {
if (parent.currentKey === undefined) {
throw new Error('Object startObject event without preceding key')
}
parent.obj[parent.currentKey] = obj
parent.currentKey = undefined
}
else if (parent.type === 'array') {
parent.arr.push(obj)
}
stack.push({ type: 'object', obj, quotedKeys })
}
break
}
case 'endObject': {
if (stack.length === 0) {
throw new Error('Unexpected endObject event')
}
const context = stack.pop()!
if (context.type !== 'object') {
throw new Error('Mismatched endObject event')
}
// Attach quoted keys metadata if any keys were quoted
if (context.quotedKeys.size > 0) {
Object.defineProperty(context.obj, QUOTED_KEY_MARKER, {
value: context.quotedKeys,
enumerable: false,
writable: false,
configurable: false,
})
}
if (stack.length === 0) {
root = context.obj
}
break
}
case 'startArray': {
const arr: JsonValue[] = []
if (stack.length === 0) {
// Root array
stack.push({ type: 'array', arr })
}
else {
const parent = stack[stack.length - 1]!
if (parent.type === 'object') {
if (parent.currentKey === undefined) {
throw new Error('Array startArray event without preceding key')
}
parent.obj[parent.currentKey] = arr
parent.currentKey = undefined
}
else if (parent.type === 'array') {
parent.arr.push(arr)
}
stack.push({ type: 'array', arr })
}
break
}
case 'endArray': {
if (stack.length === 0) {
throw new Error('Unexpected endArray event')
}
const context = stack.pop()!
if (context.type !== 'array') {
throw new Error('Mismatched endArray event')
}
if (stack.length === 0) {
root = context.arr
}
break
}
case 'key': {
if (stack.length === 0) {
throw new Error('Key event outside of object context')
}
const parent = stack[stack.length - 1]!
if (parent.type !== 'object') {
throw new Error('Key event in non-object context')
}
parent.currentKey = event.key
// Track quoted keys for path expansion
if (event.wasQuoted) {
parent.quotedKeys.add(event.key)
}
break
}
case 'primitive': {
if (stack.length === 0) {
// Root primitive
root = event.value
}
else {
const parent = stack[stack.length - 1]!
if (parent.type === 'object') {
if (parent.currentKey === undefined) {
throw new Error('Primitive event without preceding key in object')
}
parent.obj[parent.currentKey] = event.value
parent.currentKey = undefined
}
else if (parent.type === 'array') {
parent.arr.push(event.value)
}
}
break
}
}
}
if (stack.length !== 0) {
throw new Error('Incomplete event stream: stack not empty at end')
}
if (root === undefined) {
throw new Error('No root value built from events')
}
return root
}
// #endregion
// #region Asynchronous AST builder
export async function buildValueFromEventsAsync(events: AsyncIterable<JsonStreamEvent>): Promise<JsonValue> {
const stack: BuildContext[] = []
let root: JsonValue | undefined
for await (const event of events) {
switch (event.type) {
case 'startObject': {
const obj: JsonObject = {}
const quotedKeys = new Set<string>()
if (stack.length === 0) {
stack.push({ type: 'object', obj, quotedKeys })
}
else {
const parent = stack[stack.length - 1]!
if (parent.type === 'object') {
if (parent.currentKey === undefined) {
throw new Error('Object startObject event without preceding key')
}
parent.obj[parent.currentKey] = obj
parent.currentKey = undefined
}
else if (parent.type === 'array') {
parent.arr.push(obj)
}
stack.push({ type: 'object', obj, quotedKeys })
}
break
}
case 'endObject': {
if (stack.length === 0) {
throw new Error('Unexpected endObject event')
}
const context = stack.pop()!
if (context.type !== 'object') {
throw new Error('Mismatched endObject event')
}
// Attach quoted keys metadata if any keys were quoted
if (context.quotedKeys.size > 0) {
Object.defineProperty(context.obj, QUOTED_KEY_MARKER, {
value: context.quotedKeys,
enumerable: false,
writable: false,
configurable: false,
})
}
if (stack.length === 0) {
root = context.obj
}
break
}
case 'startArray': {
const arr: JsonValue[] = []
if (stack.length === 0) {
stack.push({ type: 'array', arr })
}
else {
const parent = stack[stack.length - 1]!
if (parent.type === 'object') {
if (parent.currentKey === undefined) {
throw new Error('Array startArray event without preceding key')
}
parent.obj[parent.currentKey] = arr
parent.currentKey = undefined
}
else if (parent.type === 'array') {
parent.arr.push(arr)
}
stack.push({ type: 'array', arr })
}
break
}
case 'endArray': {
if (stack.length === 0) {
throw new Error('Unexpected endArray event')
}
const context = stack.pop()!
if (context.type !== 'array') {
throw new Error('Mismatched endArray event')
}
if (stack.length === 0) {
root = context.arr
}
break
}
case 'key': {
if (stack.length === 0) {
throw new Error('Key event outside of object context')
}
const parent = stack[stack.length - 1]!
if (parent.type !== 'object') {
throw new Error('Key event in non-object context')
}
parent.currentKey = event.key
// Track quoted keys for path expansion
if (event.wasQuoted) {
parent.quotedKeys.add(event.key)
}
break
}
case 'primitive': {
if (stack.length === 0) {
root = event.value
}
else {
const parent = stack[stack.length - 1]!
if (parent.type === 'object') {
if (parent.currentKey === undefined) {
throw new Error('Primitive event without preceding key in object')
}
parent.obj[parent.currentKey] = event.value
parent.currentKey = undefined
}
else if (parent.type === 'array') {
parent.arr.push(event.value)
}
}
break
}
}
}
if (stack.length !== 0) {
throw new Error('Incomplete event stream: stack not empty at end')
}
if (root === undefined) {
throw new Error('No root value built from events')
}
return root
}
// #endregion

View File

@@ -12,7 +12,7 @@ import { isIdentifierSegment } from '../shared/validation'
export const QUOTED_KEY_MARKER: unique symbol = Symbol('quotedKey') export const QUOTED_KEY_MARKER: unique symbol = Symbol('quotedKey')
/** /**
* Type for objects that may have quoted key metadata attached. * Objects that may have quoted key metadata attached.
*/ */
export interface ObjectWithQuotedKeys extends JsonObject { export interface ObjectWithQuotedKeys extends JsonObject {
[QUOTED_KEY_MARKER]?: Set<string> [QUOTED_KEY_MARKER]?: Set<string>
@@ -226,6 +226,10 @@ function mergeObjects(
// #endregion // #endregion
// #region Type Guards
function canMerge(a: JsonValue, b: JsonValue): a is JsonObject { function canMerge(a: JsonValue, b: JsonValue): a is JsonObject {
return isJsonObject(a) && isJsonObject(b) return isJsonObject(a) && isJsonObject(b)
} }
// #endregion

View File

@@ -305,11 +305,11 @@ export function parseKeyToken(content: string, start: number): { key: string, en
// #region Array content detection helpers // #region Array content detection helpers
export function isArrayHeaderAfterHyphen(content: string): boolean { export function isArrayHeaderContent(content: string): boolean {
return content.trim().startsWith(OPEN_BRACKET) && findUnquotedChar(content, COLON) !== -1 return content.trim().startsWith(OPEN_BRACKET) && findUnquotedChar(content, COLON) !== -1
} }
export function isObjectFirstFieldAfterHyphen(content: string): boolean { export function isKeyValueContent(content: string): boolean {
return findUnquotedChar(content, COLON) !== -1 return findUnquotedChar(content, COLON) !== -1
} }

View File

@@ -1,109 +1,109 @@
import type { BlankLineInfo, Depth, ParsedLine } from '../types' import type { BlankLineInfo, Depth, ParsedLine } from '../types'
import { SPACE, TAB } from '../constants' import { SPACE, TAB } from '../constants'
export interface ScanResult { // #region Scan state
lines: ParsedLine[]
export interface StreamingScanState {
lineNumber: number
blankLines: BlankLineInfo[] blankLines: BlankLineInfo[]
} }
export class LineCursor { export function createScanState(): StreamingScanState {
private lines: ParsedLine[] return {
private index: number lineNumber: 0,
private blankLines: BlankLineInfo[] blankLines: [],
constructor(lines: ParsedLine[], blankLines: BlankLineInfo[] = []) {
this.lines = lines
this.index = 0
this.blankLines = blankLines
}
getBlankLines(): BlankLineInfo[] {
return this.blankLines
}
peek(): ParsedLine | undefined {
return this.lines[this.index]
}
next(): ParsedLine | undefined {
return this.lines[this.index++]
}
current(): ParsedLine | undefined {
return this.index > 0 ? this.lines[this.index - 1] : undefined
}
advance(): void {
this.index++
}
atEnd(): boolean {
return this.index >= this.lines.length
}
get length(): number {
return this.lines.length
}
peekAtDepth(targetDepth: Depth): ParsedLine | undefined {
const line = this.peek()
return line?.depth === targetDepth ? line : undefined
} }
} }
export function toParsedLines(source: string, indentSize: number, strict: boolean): ScanResult { // #endregion
if (!source.trim()) {
return { lines: [], blankLines: [] } // #region Line parsing
export function parseLineIncremental(
raw: string,
state: StreamingScanState,
indentSize: number,
strict: boolean,
): ParsedLine | undefined {
state.lineNumber++
const lineNumber = state.lineNumber
// Count leading spaces
let indent = 0
while (indent < raw.length && raw[indent] === SPACE) {
indent++
} }
const lines = source.split('\n') const content = raw.slice(indent)
const parsed: ParsedLine[] = []
const blankLines: BlankLineInfo[] = []
for (let i = 0; i < lines.length; i++) {
const raw = lines[i]!
const lineNumber = i + 1
let indent = 0
while (indent < raw.length && raw[indent] === SPACE) {
indent++
}
const content = raw.slice(indent)
// Track blank lines
if (!content.trim()) {
const depth = computeDepthFromIndent(indent, indentSize)
blankLines.push({ lineNumber, indent, depth })
continue
}
// Track blank lines
if (!content.trim()) {
const depth = computeDepthFromIndent(indent, indentSize) const depth = computeDepthFromIndent(indent, indentSize)
state.blankLines.push({ lineNumber, indent, depth })
// Strict mode validation return undefined
if (strict) {
// Find the full leading whitespace region (spaces and tabs)
let whitespaceEndIndex = 0
while (whitespaceEndIndex < raw.length && (raw[whitespaceEndIndex] === SPACE || raw[whitespaceEndIndex] === TAB)) {
whitespaceEndIndex++
}
// Check for tabs in leading whitespace (before actual content)
if (raw.slice(0, whitespaceEndIndex).includes(TAB)) {
throw new SyntaxError(`Line ${lineNumber}: Tabs are not allowed in indentation in strict mode`)
}
// Check for exact multiples of indentSize
if (indent > 0 && indent % indentSize !== 0) {
throw new SyntaxError(`Line ${lineNumber}: Indentation must be exact multiple of ${indentSize}, but found ${indent} spaces`)
}
}
parsed.push({ raw, indent, content, depth, lineNumber })
} }
return { lines: parsed, blankLines } const depth = computeDepthFromIndent(indent, indentSize)
// Strict mode validation
if (strict) {
// Find the full leading whitespace region (spaces and tabs)
let whitespaceEndIndex = 0
while (
whitespaceEndIndex < raw.length
&& (raw[whitespaceEndIndex] === SPACE || raw[whitespaceEndIndex] === TAB)
) {
whitespaceEndIndex++
}
// Check for tabs in leading whitespace (before actual content)
if (raw.slice(0, whitespaceEndIndex).includes(TAB)) {
throw new SyntaxError(`Line ${lineNumber}: Tabs are not allowed in indentation in strict mode`)
}
// Check for exact multiples of indentSize
if (indent > 0 && indent % indentSize !== 0) {
throw new SyntaxError(
`Line ${lineNumber}: Indentation must be exact multiple of ${indentSize}, but found ${indent} spaces`,
)
}
}
return { raw, indent, content, depth, lineNumber }
}
export function* parseLinesSync(
source: Iterable<string>,
indentSize: number,
strict: boolean,
state: StreamingScanState,
): Generator<ParsedLine> {
for (const raw of source) {
const parsedLine = parseLineIncremental(raw, state, indentSize, strict)
if (parsedLine !== undefined) {
yield parsedLine
}
}
}
export async function* parseLinesAsync(
source: AsyncIterable<string>,
indentSize: number,
strict: boolean,
state: StreamingScanState,
): AsyncGenerator<ParsedLine> {
for await (const raw of source) {
const parsedLine = parseLineIncremental(raw, state, indentSize, strict)
if (parsedLine !== undefined) {
yield parsedLine
}
}
} }
function computeDepthFromIndent(indentSpaces: number, indentSize: number): Depth { function computeDepthFromIndent(indentSpaces: number, indentSize: number): Depth {
return Math.floor(indentSpaces / indentSize) return Math.floor(indentSpaces / indentSize)
} }
// #endregion

View File

@@ -1,7 +1,8 @@
import type { ArrayHeaderInfo, BlankLineInfo, Delimiter, Depth, ResolvedDecodeOptions } from '../types' import type { ArrayHeaderInfo, BlankLineInfo, Delimiter, Depth, ParsedLine } from '../types'
import type { LineCursor } from './scanner'
import { COLON, LIST_ITEM_PREFIX } from '../constants' import { COLON, LIST_ITEM_PREFIX } from '../constants'
// #region Count and structure validation
/** /**
* Asserts that the actual count matches the expected count in strict mode. * Asserts that the actual count matches the expected count in strict mode.
*/ */
@@ -9,7 +10,7 @@ export function assertExpectedCount(
actual: number, actual: number,
expected: number, expected: number,
itemType: string, itemType: string,
options: ResolvedDecodeOptions, options: { strict: boolean },
): void { ): void {
if (options.strict && actual !== expected) { if (options.strict && actual !== expected) {
throw new RangeError(`Expected ${expected} ${itemType}, but got ${actual}`) throw new RangeError(`Expected ${expected} ${itemType}, but got ${actual}`)
@@ -20,11 +21,10 @@ export function assertExpectedCount(
* Validates that there are no extra list items beyond the expected count. * Validates that there are no extra list items beyond the expected count.
*/ */
export function validateNoExtraListItems( export function validateNoExtraListItems(
cursor: LineCursor, nextLine: ParsedLine | undefined,
itemDepth: Depth, itemDepth: Depth,
expectedCount: number, expectedCount: number,
): void { ): void {
const nextLine = cursor.peek()
if (nextLine?.depth === itemDepth && nextLine.content.startsWith(LIST_ITEM_PREFIX)) { if (nextLine?.depth === itemDepth && nextLine.content.startsWith(LIST_ITEM_PREFIX)) {
throw new RangeError(`Expected ${expectedCount} list array items, but found more`) throw new RangeError(`Expected ${expectedCount} list array items, but found more`)
} }
@@ -34,11 +34,10 @@ export function validateNoExtraListItems(
* Validates that there are no extra tabular rows beyond the expected count. * Validates that there are no extra tabular rows beyond the expected count.
*/ */
export function validateNoExtraTabularRows( export function validateNoExtraTabularRows(
cursor: LineCursor, nextLine: ParsedLine | undefined,
rowDepth: Depth, rowDepth: Depth,
header: ArrayHeaderInfo, header: ArrayHeaderInfo,
): void { ): void {
const nextLine = cursor.peek()
if ( if (
nextLine?.depth === rowDepth nextLine?.depth === rowDepth
&& !nextLine.content.startsWith(LIST_ITEM_PREFIX) && !nextLine.content.startsWith(LIST_ITEM_PREFIX)
@@ -62,8 +61,6 @@ export function validateNoBlankLinesInRange(
return return
// Find blank lines within the range // Find blank lines within the range
// Note: We don't filter by depth because ANY blank line between array items is an error,
// regardless of its indentation level
const firstBlank = blankLines.find( const firstBlank = blankLines.find(
blank => blank.lineNumber > startLine && blank.lineNumber < endLine, blank => blank.lineNumber > startLine && blank.lineNumber < endLine,
) )
@@ -75,6 +72,10 @@ export function validateNoBlankLinesInRange(
} }
} }
// #endregion
// #region Row classification helpers
/** /**
* Checks if a line is a data row (vs a key-value pair) in a tabular array. * Checks if a line is a data row (vs a key-value pair) in a tabular array.
*/ */
@@ -95,3 +96,5 @@ function isDataRow(content: string, delimiter: Delimiter): boolean {
// Colon before delimiter or no delimiter = key-value pair // Colon before delimiter or no delimiter = key-value pair
return false return false
} }
// #endregion

View File

@@ -1,55 +1,27 @@
import type { DecodeOptions, EncodeOptions, JsonValue, ResolvedDecodeOptions, ResolvedEncodeOptions } from './types' import type { DecodeOptions, DecodeStreamOptions, EncodeOptions, JsonStreamEvent, JsonValue, ResolvedDecodeOptions, ResolvedEncodeOptions } from './types'
import { DEFAULT_DELIMITER } from './constants' import { DEFAULT_DELIMITER } from './constants'
import { decodeValueFromLines } from './decode/decoders' import { decodeStream as decodeStreamCore, decodeStreamSync as decodeStreamSyncCore } from './decode/decoders'
import { buildValueFromEvents } from './decode/event-builder'
import { expandPathsSafe } from './decode/expand' import { expandPathsSafe } from './decode/expand'
import { LineCursor, toParsedLines } from './decode/scanner'
import { encodeJsonValue } from './encode/encoders' import { encodeJsonValue } from './encode/encoders'
import { normalizeValue } from './encode/normalize' import { normalizeValue } from './encode/normalize'
export { DEFAULT_DELIMITER, DELIMITERS } from './constants' export { DEFAULT_DELIMITER, DELIMITERS } from './constants'
export type { export type {
DecodeOptions, DecodeOptions,
DecodeStreamOptions,
Delimiter, Delimiter,
DelimiterKey, DelimiterKey,
EncodeOptions, EncodeOptions,
JsonArray, JsonArray,
JsonObject, JsonObject,
JsonPrimitive, JsonPrimitive,
JsonStreamEvent,
JsonValue, JsonValue,
ResolvedDecodeOptions, ResolvedDecodeOptions,
ResolvedEncodeOptions, ResolvedEncodeOptions,
} from './types' } from './types'
/**
* Encodes a JavaScript value into TOON format as a sequence of lines.
*
* This function yields TOON lines one at a time without building the full string,
* making it suitable for streaming large outputs to files, HTTP responses, or process stdout.
*
* @param input - Any JavaScript value (objects, arrays, primitives)
* @param options - Optional encoding configuration
* @returns Iterable of TOON lines (without trailing newlines)
*
* @example
* ```ts
* // Stream to stdout
* for (const line of encodeLines({ name: 'Alice', age: 30 })) {
* console.log(line)
* }
*
* // Collect to array
* const lines = Array.from(encodeLines(data))
*
* // Equivalent to encode()
* const toonString = Array.from(encodeLines(data, options)).join('\n')
* ```
*/
export function encodeLines(input: unknown, options?: EncodeOptions): Iterable<string> {
const normalizedValue = normalizeValue(input)
const resolvedOptions = resolveOptions(options)
return encodeJsonValue(normalizedValue, resolvedOptions, 0)
}
/** /**
* Encodes a JavaScript value into TOON format string. * Encodes a JavaScript value into TOON format string.
* *
@@ -94,15 +66,69 @@ export function encode(input: unknown, options?: EncodeOptions): string {
* ``` * ```
*/ */
export function decode(input: string, options?: DecodeOptions): JsonValue { export function decode(input: string, options?: DecodeOptions): JsonValue {
const resolvedOptions = resolveDecodeOptions(options) const lines = input.split('\n')
const scanResult = toParsedLines(input, resolvedOptions.indent, resolvedOptions.strict) return decodeFromLines(lines, options)
}
if (scanResult.lines.length === 0) { /**
return {} * Encodes a JavaScript value into TOON format as a sequence of lines.
*
* This function yields TOON lines one at a time without building the full string,
* making it suitable for streaming large outputs to files, HTTP responses, or process stdout.
*
* @param input - Any JavaScript value (objects, arrays, primitives)
* @param options - Optional encoding configuration
* @returns Iterable of TOON lines (without trailing newlines)
*
* @example
* ```ts
* // Stream to stdout
* for (const line of encodeLines({ name: 'Alice', age: 30 })) {
* console.log(line)
* }
*
* // Collect to array
* const lines = Array.from(encodeLines(data))
*
* // Equivalent to encode()
* const toonString = Array.from(encodeLines(data, options)).join('\n')
* ```
*/
export function encodeLines(input: unknown, options?: EncodeOptions): Iterable<string> {
const normalizedValue = normalizeValue(input)
const resolvedOptions = resolveOptions(options)
return encodeJsonValue(normalizedValue, resolvedOptions, 0)
}
/**
* Decodes TOON format from pre-split lines into a JavaScript value.
*
* This is a convenience wrapper around the streaming decoder that builds
* the full value in memory. Useful when you already have lines as an array
* or iterable and want the standard decode behavior with path expansion support.
*
* @param lines - Iterable of TOON lines (without newlines)
* @param options - Optional decoding configuration (supports expandPaths)
* @returns Parsed JavaScript value (object, array, or primitive)
*
* @example
* ```ts
* const lines = ['name: Alice', 'age: 30']
* decodeFromLines(lines)
* // { name: 'Alice', age: 30 }
* ```
*/
export function decodeFromLines(lines: Iterable<string>, options?: DecodeOptions): JsonValue {
const resolvedOptions = resolveDecodeOptions(options)
// Use streaming decoder without expandPaths
const streamOptions: DecodeStreamOptions = {
indent: resolvedOptions.indent,
strict: resolvedOptions.strict,
} }
const cursor = new LineCursor(scanResult.lines, scanResult.blankLines) const events = decodeStreamSyncCore(lines, streamOptions)
const decodedValue = decodeValueFromLines(cursor, resolvedOptions) const decodedValue = buildValueFromEvents(events)
// Apply path expansion if enabled // Apply path expansion if enabled
if (resolvedOptions.expandPaths === 'safe') { if (resolvedOptions.expandPaths === 'safe') {
@@ -112,6 +138,72 @@ export function decode(input: string, options?: DecodeOptions): JsonValue {
return decodedValue return decodedValue
} }
/**
* Synchronously decodes TOON lines into a stream of JSON events.
*
* This function yields structured events (startObject, endObject, startArray, endArray,
* key, primitive) that represent the JSON data model without building the full value tree.
* Useful for streaming processing, custom transformations, or memory-efficient parsing.
*
* @remarks
* Path expansion (`expandPaths: 'safe'`) is not supported in streaming mode.
*
* @param lines - Iterable of TOON lines (without newlines)
* @param options - Optional decoding configuration (expandPaths not supported)
* @returns Iterable of JSON stream events
*
* @example
* ```ts
* const lines = ['name: Alice', 'age: 30']
* for (const event of decodeStreamSync(lines)) {
* console.log(event)
* // { type: 'startObject' }
* // { type: 'key', key: 'name' }
* // { type: 'primitive', value: 'Alice' }
* // ...
* }
* ```
*/
export function decodeStreamSync(lines: Iterable<string>, options?: DecodeStreamOptions): Iterable<JsonStreamEvent> {
return decodeStreamSyncCore(lines, options)
}
/**
* Asynchronously decodes TOON lines into a stream of JSON events.
*
* This function yields structured events (startObject, endObject, startArray, endArray,
* key, primitive) that represent the JSON data model without building the full value tree.
* Supports both sync and async iterables for maximum flexibility with file streams,
* network responses, or other async sources.
*
* @remarks
* Path expansion (`expandPaths: 'safe'`) is not supported in streaming mode.
*
* @param source - Async or sync iterable of TOON lines (without newlines)
* @param options - Optional decoding configuration (expandPaths not supported)
* @returns Async iterable of JSON stream events
*
* @example
* ```ts
* const fileStream = createReadStream('data.toon', 'utf-8')
* const lines = splitLines(fileStream) // Async iterable of lines
*
* for await (const event of decodeStream(lines)) {
* console.log(event)
* // { type: 'startObject' }
* // { type: 'key', key: 'name' }
* // { type: 'primitive', value: 'Alice' }
* // ...
* }
* ```
*/
export function decodeStream(
source: AsyncIterable<string> | Iterable<string>,
options?: DecodeStreamOptions,
): AsyncIterable<JsonStreamEvent> {
return decodeStreamCore(source, options)
}
function resolveOptions(options?: EncodeOptions): ResolvedEncodeOptions { function resolveOptions(options?: EncodeOptions): ResolvedEncodeOptions {
return { return {
indent: options?.indent ?? 2, indent: options?.indent ?? 2,

View File

@@ -69,6 +69,32 @@ export interface DecodeOptions {
export type ResolvedDecodeOptions = Readonly<Required<DecodeOptions>> export type ResolvedDecodeOptions = Readonly<Required<DecodeOptions>>
/**
* Options for streaming decode operations.
*
* @remarks
* Path expansion is not supported in streaming mode.
*/
export interface DecodeStreamOptions extends Omit<DecodeOptions, 'expandPaths'> {
/**
* Path expansion is not supported in streaming decode.
* This option is explicitly omitted.
*/
expandPaths?: never
}
// #endregion
// #region Streaming decoder types
export type JsonStreamEvent
= | { type: 'startObject' }
| { type: 'endObject' }
| { type: 'startArray', length: number }
| { type: 'endArray' }
| { type: 'key', key: string, wasQuoted?: boolean }
| { type: 'primitive', value: JsonPrimitive }
// #endregion // #endregion
// #region Decoder parsing types // #region Decoder parsing types

View File

@@ -0,0 +1,343 @@
import { describe, expect, it } from 'vitest'
import { buildValueFromEvents } from '../src/decode/event-builder'
import { decode, decodeFromLines, decodeStreamSync } from '../src/index'
describe('streaming decode', () => {
describe('decodeStreamSync', () => {
it('should decode simple object', () => {
const input = 'name: Alice\nage: 30'
const lines = input.split('\n')
const events = Array.from(decodeStreamSync(lines))
expect(events).toEqual([
{ type: 'startObject' },
{ type: 'key', key: 'name' },
{ type: 'primitive', value: 'Alice' },
{ type: 'key', key: 'age' },
{ type: 'primitive', value: 30 },
{ type: 'endObject' },
])
})
it('should decode nested object', () => {
const input = 'user:\n name: Alice\n age: 30'
const lines = input.split('\n')
const events = Array.from(decodeStreamSync(lines))
expect(events).toEqual([
{ type: 'startObject' },
{ type: 'key', key: 'user' },
{ type: 'startObject' },
{ type: 'key', key: 'name' },
{ type: 'primitive', value: 'Alice' },
{ type: 'key', key: 'age' },
{ type: 'primitive', value: 30 },
{ type: 'endObject' },
{ type: 'endObject' },
])
})
it('should decode inline primitive array', () => {
const input = 'scores[3]: 95, 87, 92'
const lines = input.split('\n')
const events = Array.from(decodeStreamSync(lines))
expect(events).toEqual([
{ type: 'startObject' },
{ type: 'key', key: 'scores' },
{ type: 'startArray', length: 3 },
{ type: 'primitive', value: 95 },
{ type: 'primitive', value: 87 },
{ type: 'primitive', value: 92 },
{ type: 'endArray' },
{ type: 'endObject' },
])
})
it('should decode list array', () => {
const input = 'items[2]:\n - Apple\n - Banana'
const lines = input.split('\n')
const events = Array.from(decodeStreamSync(lines))
expect(events).toEqual([
{ type: 'startObject' },
{ type: 'key', key: 'items' },
{ type: 'startArray', length: 2 },
{ type: 'primitive', value: 'Apple' },
{ type: 'primitive', value: 'Banana' },
{ type: 'endArray' },
{ type: 'endObject' },
])
})
it('should decode tabular array', () => {
const input = 'users[2]{name,age}:\n Alice, 30\n Bob, 25'
const lines = input.split('\n')
const events = Array.from(decodeStreamSync(lines))
expect(events).toEqual([
{ type: 'startObject' },
{ type: 'key', key: 'users' },
{ type: 'startArray', length: 2 },
{ type: 'startObject' },
{ type: 'key', key: 'name' },
{ type: 'primitive', value: 'Alice' },
{ type: 'key', key: 'age' },
{ type: 'primitive', value: 30 },
{ type: 'endObject' },
{ type: 'startObject' },
{ type: 'key', key: 'name' },
{ type: 'primitive', value: 'Bob' },
{ type: 'key', key: 'age' },
{ type: 'primitive', value: 25 },
{ type: 'endObject' },
{ type: 'endArray' },
{ type: 'endObject' },
])
})
it('should decode root primitive', () => {
const input = 'Hello World'
const lines = input.split('\n')
const events = Array.from(decodeStreamSync(lines))
expect(events).toEqual([
{ type: 'primitive', value: 'Hello World' },
])
})
it('should decode root array', () => {
const input = '[2]:\n - Apple\n - Banana'
const lines = input.split('\n')
const events = Array.from(decodeStreamSync(lines))
expect(events).toEqual([
{ type: 'startArray', length: 2 },
{ type: 'primitive', value: 'Apple' },
{ type: 'primitive', value: 'Banana' },
{ type: 'endArray' },
])
})
it('should decode empty input as empty object', () => {
const lines: string[] = []
const events = Array.from(decodeStreamSync(lines))
expect(events).toEqual([
{ type: 'startObject' },
{ type: 'endObject' },
])
})
it('should throw on expandPaths option', () => {
const input = 'name: Alice'
const lines = input.split('\n')
expect(() => Array.from(decodeStreamSync(lines, { expandPaths: 'safe' } as any)))
.toThrow('expandPaths is not supported in streaming decode')
})
it('should enforce strict mode validation', () => {
const input = 'items[2]:\n - Apple'
const lines = input.split('\n')
expect(() => Array.from(decodeStreamSync(lines, { strict: true })))
.toThrow()
})
it('should allow count mismatch in non-strict mode', () => {
const input = 'items[2]:\n - Apple'
const lines = input.split('\n')
// Should not throw in non-strict mode
const events = Array.from(decodeStreamSync(lines, { strict: false }))
expect(events).toBeDefined()
expect(events[0]).toEqual({ type: 'startObject' })
})
})
describe('buildValueFromEvents', () => {
it('should build object from events', () => {
const events = [
{ type: 'startObject' as const },
{ type: 'key' as const, key: 'name' },
{ type: 'primitive' as const, value: 'Alice' },
{ type: 'key' as const, key: 'age' },
{ type: 'primitive' as const, value: 30 },
{ type: 'endObject' as const },
]
const result = buildValueFromEvents(events)
expect(result).toEqual({ name: 'Alice', age: 30 })
})
it('should build nested object from events', () => {
const events = [
{ type: 'startObject' as const },
{ type: 'key' as const, key: 'user' },
{ type: 'startObject' as const },
{ type: 'key' as const, key: 'name' },
{ type: 'primitive' as const, value: 'Alice' },
{ type: 'endObject' as const },
{ type: 'endObject' as const },
]
const result = buildValueFromEvents(events)
expect(result).toEqual({ user: { name: 'Alice' } })
})
it('should build array from events', () => {
const events = [
{ type: 'startArray' as const, length: 3 },
{ type: 'primitive' as const, value: 1 },
{ type: 'primitive' as const, value: 2 },
{ type: 'primitive' as const, value: 3 },
{ type: 'endArray' as const },
]
const result = buildValueFromEvents(events)
expect(result).toEqual([1, 2, 3])
})
it('should build primitive from events', () => {
const events = [
{ type: 'primitive' as const, value: 'Hello' },
]
const result = buildValueFromEvents(events)
expect(result).toEqual('Hello')
})
it('should throw on incomplete event stream', () => {
const events = [
{ type: 'startObject' as const },
{ type: 'key' as const, key: 'name' },
// Missing primitive and endObject
]
expect(() => buildValueFromEvents(events))
.toThrow('Incomplete event stream')
})
})
describe('decodeFromLines', () => {
it('should produce same result as decode', () => {
const input = 'name: Alice\nage: 30\nscores[3]: 95, 87, 92'
const lines = input.split('\n')
const fromLines = decodeFromLines(lines)
const fromString = decode(input)
expect(fromLines).toEqual(fromString)
})
it('should support expandPaths option', () => {
const input = 'user.name: Alice\nuser.age: 30'
const lines = input.split('\n')
const result = decodeFromLines(lines, { expandPaths: 'safe' })
expect(result).toEqual({
user: {
name: 'Alice',
age: 30,
},
})
})
it('should handle complex nested structures', () => {
const input = [
'users[2]:',
' - name: Alice',
' scores[3]: 95, 87, 92',
' - name: Bob',
' scores[3]: 88, 91, 85',
].join('\n')
const fromLines = decodeFromLines(input.split('\n'))
const fromString = decode(input)
expect(fromLines).toEqual(fromString)
expect(fromLines).toEqual({
users: [
{ name: 'Alice', scores: [95, 87, 92] },
{ name: 'Bob', scores: [88, 91, 85] },
],
})
})
it('should handle tabular arrays', () => {
const input = [
'users[3]{name,age,city}:',
' Alice, 30, NYC',
' Bob, 25, LA',
' Charlie, 35, SF',
].join('\n')
const fromLines = decodeFromLines(input.split('\n'))
const fromString = decode(input)
expect(fromLines).toEqual(fromString)
expect(fromLines).toEqual({
users: [
{ name: 'Alice', age: 30, city: 'NYC' },
{ name: 'Bob', age: 25, city: 'LA' },
{ name: 'Charlie', age: 35, city: 'SF' },
],
})
})
})
describe('streaming equivalence', () => {
// Test that streaming produces same results as non-streaming for various inputs
const testCases = [
{
name: 'simple object',
input: 'name: Alice\nage: 30',
},
{
name: 'nested objects',
input: 'user:\n profile:\n name: Alice\n age: 30',
},
{
name: 'mixed structures',
input: 'name: Alice\nscores[3]: 95, 87, 92\naddress:\n city: NYC\n zip: 10001',
},
{
name: 'list array with objects',
input: 'users[2]:\n - name: Alice\n age: 30\n - name: Bob\n age: 25',
},
{
name: 'root primitive number',
input: '42',
},
{
name: 'root primitive string',
input: 'Hello World',
},
{
name: 'root primitive boolean',
input: 'true',
},
{
name: 'root primitive null',
input: 'null',
},
]
for (const testCase of testCases) {
it(`should match decode() for: ${testCase.name}`, () => {
const lines = testCase.input.split('\n')
const streamResult = decodeFromLines(lines)
const regularResult = decode(testCase.input)
expect(streamResult).toEqual(regularResult)
})
}
})
})