mirror of
https://github.com/voson-wang/toon.git
synced 2026-01-29 23:34:10 +08:00
feat: streaming decode functionality with event-based parsing (closes #131)
This commit is contained in:
40
README.md
40
README.md
@@ -777,6 +777,46 @@ for (const line of encodeLines(largeData)) {
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
**Streaming decode:**
|
||||||
|
|
||||||
|
```ts
|
||||||
|
import { decodeFromLines, decodeStreamSync } from '@toon-format/toon'
|
||||||
|
|
||||||
|
// 1. Lines → value (build full JSON value)
|
||||||
|
const value = decodeFromLines([
|
||||||
|
'users[2]{id,name}:',
|
||||||
|
' 1,Alice',
|
||||||
|
' 2,Bob',
|
||||||
|
])
|
||||||
|
// { users: [{ id: 1, name: 'Alice' }, { id: 2, name: 'Bob' }] }
|
||||||
|
|
||||||
|
// 2. Lines → events (for custom streaming consumers)
|
||||||
|
const lines = [
|
||||||
|
'users[2]{id,name}:',
|
||||||
|
' 1,Alice',
|
||||||
|
' 2,Bob',
|
||||||
|
]
|
||||||
|
for (const event of decodeStreamSync(lines)) {
|
||||||
|
// { type: 'startObject' }, { type: 'key', key: 'users' }, ...
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Async streaming decode:**
|
||||||
|
|
||||||
|
```ts
|
||||||
|
// 3. Async streaming from files or network
|
||||||
|
import { createReadStream } from 'node:fs'
|
||||||
|
import { createInterface } from 'node:readline'
|
||||||
|
import { decodeStream } from '@toon-format/toon'
|
||||||
|
|
||||||
|
const fileStream = createReadStream('data.toon', 'utf-8')
|
||||||
|
const rl = createInterface({ input: fileStream })
|
||||||
|
|
||||||
|
for await (const event of decodeStream(rl)) {
|
||||||
|
// Process events as they arrive
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
## Playgrounds
|
## Playgrounds
|
||||||
|
|
||||||
Experiment with TOON format interactively using these community-built tools for token comparison, format conversion, and validation:
|
Experiment with TOON format interactively using these community-built tools for token comparison, format conversion, and validation:
|
||||||
|
|||||||
@@ -108,19 +108,25 @@ cat data.toon | toon --decode
|
|||||||
|
|
||||||
Both encoding and decoding operations use streaming output, writing incrementally without building the full output string in memory. This makes the CLI efficient for large datasets without requiring additional configuration.
|
Both encoding and decoding operations use streaming output, writing incrementally without building the full output string in memory. This makes the CLI efficient for large datasets without requiring additional configuration.
|
||||||
|
|
||||||
**JSON → TOON (Encode)**
|
**JSON → TOON (Encode)**:
|
||||||
- Streams TOON lines to output
|
|
||||||
- No full TOON string in memory
|
|
||||||
|
|
||||||
**TOON → JSON (Decode)**
|
- Streams TOON lines to output.
|
||||||
- Streams JSON tokens to output
|
- No full TOON string in memory.
|
||||||
- No full JSON string in memory
|
|
||||||
|
**TOON → JSON (Decode)**:
|
||||||
|
|
||||||
|
- Uses the same event-based streaming decoder as the `decodeStream` API in `@toon-format/toon`.
|
||||||
|
- Streams JSON tokens to output.
|
||||||
|
- No full JSON string in memory.
|
||||||
|
- When `--expand-paths safe` is enabled, falls back to non-streaming decode internally to apply deep-merge expansion before writing JSON.
|
||||||
|
|
||||||
|
Process large files with minimal memory usage:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Encode large JSON file with minimal memory usage
|
# Encode large JSON file
|
||||||
toon huge-dataset.json -o output.toon
|
toon huge-dataset.json -o output.toon
|
||||||
|
|
||||||
# Decode large TOON file with minimal memory usage
|
# Decode large TOON file
|
||||||
toon huge-dataset.toon -o output.json
|
toon huge-dataset.toon -o output.json
|
||||||
|
|
||||||
# Process millions of records efficiently via stdin
|
# Process millions of records efficiently via stdin
|
||||||
|
|||||||
@@ -237,3 +237,5 @@ Round-tripping is lossless: `decode(encode(x))` always equals `x` (after normali
|
|||||||
## Where to Go Next
|
## Where to Go Next
|
||||||
|
|
||||||
Now that you've seen your first TOON document, read the [Format Overview](/guide/format-overview) for complete syntax details (objects, arrays, quoting rules, key folding), then explore [Using TOON with LLMs](/guide/llm-prompts) to see how to use it effectively in prompts. For implementation details, check the [API reference](/reference/api) (TypeScript) or the [specification](/reference/spec) (language-agnostic normative rules).
|
Now that you've seen your first TOON document, read the [Format Overview](/guide/format-overview) for complete syntax details (objects, arrays, quoting rules, key folding), then explore [Using TOON with LLMs](/guide/llm-prompts) to see how to use it effectively in prompts. For implementation details, check the [API reference](/reference/api) (TypeScript) or the [specification](/reference/spec) (language-agnostic normative rules).
|
||||||
|
|
||||||
|
For large datasets or streaming use-cases, see `encodeLines`, `decodeFromLines`, and `decodeStream` in the [API reference](/reference/api).
|
||||||
|
|||||||
@@ -118,6 +118,31 @@ toon large-dataset.json --output output.toon
|
|||||||
|
|
||||||
This streaming approach prevents out-of-memory errors when preparing large context windows for LLMs. For complete details on `encodeLines()`, see the [API reference](/reference/api#encodelines).
|
This streaming approach prevents out-of-memory errors when preparing large context windows for LLMs. For complete details on `encodeLines()`, see the [API reference](/reference/api#encodelines).
|
||||||
|
|
||||||
|
**Consuming streaming LLM outputs:** If your LLM client exposes streaming text and you buffer by lines, you can decode TOON incrementally:
|
||||||
|
|
||||||
|
```ts
|
||||||
|
import { decodeFromLines } from '@toon-format/toon'
|
||||||
|
|
||||||
|
// Buffer streaming response into lines
|
||||||
|
const lines: string[] = []
|
||||||
|
let buffer = ''
|
||||||
|
|
||||||
|
for await (const chunk of modelStream) {
|
||||||
|
buffer += chunk
|
||||||
|
let index: number
|
||||||
|
|
||||||
|
while ((index = buffer.indexOf('\n')) !== -1) {
|
||||||
|
lines.push(buffer.slice(0, index))
|
||||||
|
buffer = buffer.slice(index + 1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Decode buffered lines
|
||||||
|
const data = decodeFromLines(lines)
|
||||||
|
```
|
||||||
|
|
||||||
|
For streaming decode APIs, see [`decodeFromLines()`](/reference/api#decodeFromLines-lines-options) and [`decodeStream()`](/reference/api#decodeStream-source-options).
|
||||||
|
|
||||||
## Tips and Pitfalls
|
## Tips and Pitfalls
|
||||||
|
|
||||||
**Show, don't describe.** Don't explain TOON syntax in detail – just show an example. Models learn the pattern from context. A simple code block with 2-5 rows is more effective than paragraphs of explanation.
|
**Show, don't describe.** Don't explain TOON syntax in detail – just show an example. Models learn the pattern from context. A simple code block with 2-5 rows is more effective than paragraphs of explanation.
|
||||||
|
|||||||
@@ -300,6 +300,227 @@ decode(toon, { expandPaths: 'safe', strict: false })
|
|||||||
```
|
```
|
||||||
:::
|
:::
|
||||||
|
|
||||||
|
## `decodeFromLines(lines, options?)`
|
||||||
|
|
||||||
|
Decodes TOON format from pre-split lines into a JavaScript value. This is a streaming-friendly wrapper around the event-based decoder that builds the full value in memory.
|
||||||
|
|
||||||
|
Useful when you already have lines as an array or iterable (e.g., from file streams, readline interfaces, or network responses) and want the standard decode behavior with path expansion support.
|
||||||
|
|
||||||
|
### Parameters
|
||||||
|
|
||||||
|
| Parameter | Type | Description |
|
||||||
|
|-----------|------|-------------|
|
||||||
|
| `lines` | `Iterable<string>` | Iterable of TOON lines (without trailing newlines) |
|
||||||
|
| `options` | `DecodeOptions?` | Optional decoding configuration (see below) |
|
||||||
|
|
||||||
|
### Options
|
||||||
|
|
||||||
|
| Option | Type | Default | Description |
|
||||||
|
|--------|------|---------|-------------|
|
||||||
|
| `indent` | `number` | `2` | Expected number of spaces per indentation level |
|
||||||
|
| `strict` | `boolean` | `true` | Enable strict validation (array counts, indentation, delimiter consistency) |
|
||||||
|
| `expandPaths` | `'off'` \| `'safe'` | `'off'` | Enable path expansion to reconstruct dotted keys into nested objects |
|
||||||
|
|
||||||
|
### Return Value
|
||||||
|
|
||||||
|
Returns a `JsonValue` (the parsed JavaScript value: object, array, or primitive).
|
||||||
|
|
||||||
|
### Example
|
||||||
|
|
||||||
|
**Basic usage with arrays:**
|
||||||
|
|
||||||
|
```ts
|
||||||
|
import { decodeFromLines } from '@toon-format/toon'
|
||||||
|
|
||||||
|
const lines = ['name: Alice', 'age: 30']
|
||||||
|
const value = decodeFromLines(lines)
|
||||||
|
// { name: 'Alice', age: 30 }
|
||||||
|
```
|
||||||
|
|
||||||
|
**Streaming from Node.js readline:**
|
||||||
|
|
||||||
|
```ts
|
||||||
|
import { createReadStream } from 'node:fs'
|
||||||
|
import { createInterface } from 'node:readline'
|
||||||
|
import { decodeFromLines } from '@toon-format/toon'
|
||||||
|
|
||||||
|
const rl = createInterface({
|
||||||
|
input: createReadStream('data.toon'),
|
||||||
|
crlfDelay: Infinity,
|
||||||
|
})
|
||||||
|
|
||||||
|
const value = decodeFromLines(rl)
|
||||||
|
console.log(value)
|
||||||
|
```
|
||||||
|
|
||||||
|
**With path expansion:**
|
||||||
|
|
||||||
|
```ts
|
||||||
|
const lines = ['user.name: Alice', 'user.age: 30']
|
||||||
|
const value = decodeFromLines(lines, { expandPaths: 'safe' })
|
||||||
|
// { user: { name: 'Alice', age: 30 } }
|
||||||
|
```
|
||||||
|
|
||||||
|
## `decodeStreamSync(lines, options?)`
|
||||||
|
|
||||||
|
Synchronously decodes TOON lines into a stream of JSON events. This function yields structured events that represent the JSON data model without building the full value tree.
|
||||||
|
|
||||||
|
Useful for streaming processing, custom transformations, or memory-efficient parsing of large datasets where you don't need the full value in memory.
|
||||||
|
|
||||||
|
::: info Event Streaming
|
||||||
|
This is a low-level API that returns individual parse events. For most use cases, [`decodeFromLines()`](#decodeFromLines-lines-options) or [`decode()`](#decode-input-options) are more convenient.
|
||||||
|
|
||||||
|
Path expansion (`expandPaths: 'safe'`) is **not supported** in streaming mode since it requires the full value tree.
|
||||||
|
:::
|
||||||
|
|
||||||
|
### Parameters
|
||||||
|
|
||||||
|
| Parameter | Type | Description |
|
||||||
|
|-----------|------|-------------|
|
||||||
|
| `lines` | `Iterable<string>` | Iterable of TOON lines (without trailing newlines) |
|
||||||
|
| `options` | `DecodeStreamOptions?` | Optional streaming decoding configuration (see below) |
|
||||||
|
|
||||||
|
### Options
|
||||||
|
|
||||||
|
| Option | Type | Default | Description |
|
||||||
|
|--------|------|---------|-------------|
|
||||||
|
| `indent` | `number` | `2` | Expected number of spaces per indentation level |
|
||||||
|
| `strict` | `boolean` | `true` | Enable strict validation (array counts, indentation, delimiter consistency) |
|
||||||
|
|
||||||
|
### Return Value
|
||||||
|
|
||||||
|
Returns an `Iterable<JsonStreamEvent>` that yields structured events.
|
||||||
|
|
||||||
|
### Event Types
|
||||||
|
|
||||||
|
Events represent the structure of the JSON data model:
|
||||||
|
|
||||||
|
```ts
|
||||||
|
type JsonStreamEvent
|
||||||
|
= | { type: 'startObject' }
|
||||||
|
| { type: 'endObject' }
|
||||||
|
| { type: 'startArray' }
|
||||||
|
| { type: 'endArray' }
|
||||||
|
| { type: 'key', key: string }
|
||||||
|
| { type: 'primitive', value: JsonPrimitive }
|
||||||
|
|
||||||
|
type JsonPrimitive = string | number | boolean | null
|
||||||
|
```
|
||||||
|
|
||||||
|
### Example
|
||||||
|
|
||||||
|
**Basic event streaming:**
|
||||||
|
|
||||||
|
```ts
|
||||||
|
import { decodeStreamSync } from '@toon-format/toon'
|
||||||
|
|
||||||
|
const lines = ['name: Alice', 'age: 30']
|
||||||
|
|
||||||
|
for (const event of decodeStreamSync(lines)) {
|
||||||
|
console.log(event)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Output:
|
||||||
|
// { type: 'startObject' }
|
||||||
|
// { type: 'key', key: 'name' }
|
||||||
|
// { type: 'primitive', value: 'Alice' }
|
||||||
|
// { type: 'key', key: 'age' }
|
||||||
|
// { type: 'primitive', value: 30 }
|
||||||
|
// { type: 'endObject' }
|
||||||
|
```
|
||||||
|
|
||||||
|
**Custom processing:**
|
||||||
|
|
||||||
|
```ts
|
||||||
|
import { decodeStreamSync } from '@toon-format/toon'
|
||||||
|
|
||||||
|
const lines = ['users[2]{id,name}:', ' 1,Alice', ' 2,Bob']
|
||||||
|
let userCount = 0
|
||||||
|
|
||||||
|
for (const event of decodeStreamSync(lines)) {
|
||||||
|
if (event.type === 'endObject' && userCount < 2) {
|
||||||
|
userCount++
|
||||||
|
console.log(`Processed user ${userCount}`)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## `decodeStream(source, options?)`
|
||||||
|
|
||||||
|
Asynchronously decodes TOON lines into a stream of JSON events. This is the async version of [`decodeStreamSync()`](#decodeStreamSync-lines-options), supporting both synchronous and asynchronous iterables.
|
||||||
|
|
||||||
|
Useful for processing file streams, network responses, or other async sources where you want to handle data incrementally as it arrives.
|
||||||
|
|
||||||
|
### Parameters
|
||||||
|
|
||||||
|
| Parameter | Type | Description |
|
||||||
|
|-----------|------|-------------|
|
||||||
|
| `source` | `AsyncIterable<string>` \| `Iterable<string>` | Async or sync iterable of TOON lines (without trailing newlines) |
|
||||||
|
| `options` | `DecodeStreamOptions?` | Optional streaming decoding configuration (see below) |
|
||||||
|
|
||||||
|
### Options
|
||||||
|
|
||||||
|
| Option | Type | Default | Description |
|
||||||
|
|--------|------|---------|-------------|
|
||||||
|
| `indent` | `number` | `2` | Expected number of spaces per indentation level |
|
||||||
|
| `strict` | `boolean` | `true` | Enable strict validation (array counts, indentation, delimiter consistency) |
|
||||||
|
|
||||||
|
### Return Value
|
||||||
|
|
||||||
|
Returns an `AsyncIterable<JsonStreamEvent>` that yields structured events asynchronously.
|
||||||
|
|
||||||
|
### Example
|
||||||
|
|
||||||
|
**Streaming from file:**
|
||||||
|
|
||||||
|
```ts
|
||||||
|
import { createReadStream } from 'node:fs'
|
||||||
|
import { createInterface } from 'node:readline'
|
||||||
|
import { decodeStream } from '@toon-format/toon'
|
||||||
|
|
||||||
|
const fileStream = createReadStream('data.toon', 'utf-8')
|
||||||
|
const rl = createInterface({ input: fileStream, crlfDelay: Infinity })
|
||||||
|
|
||||||
|
for await (const event of decodeStream(rl)) {
|
||||||
|
console.log(event)
|
||||||
|
// Process events as they arrive
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Processing events incrementally:**
|
||||||
|
|
||||||
|
```ts
|
||||||
|
import { decodeStream } from '@toon-format/toon'
|
||||||
|
|
||||||
|
const lines = getAsyncLineSource() // AsyncIterable<string>
|
||||||
|
|
||||||
|
for await (const event of decodeStream(lines, { strict: true })) {
|
||||||
|
if (event.type === 'key' && event.key === 'id') {
|
||||||
|
// Next event will be the id value
|
||||||
|
const valueEvent = await decodeStream(lines).next()
|
||||||
|
if (valueEvent.value?.type === 'primitive') {
|
||||||
|
console.log('Found ID:', valueEvent.value.value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Auto-detection of sync/async sources:**
|
||||||
|
|
||||||
|
```ts
|
||||||
|
// Works with sync iterables
|
||||||
|
const syncLines = ['name: Alice', 'age: 30']
|
||||||
|
for await (const event of decodeStream(syncLines)) {
|
||||||
|
console.log(event)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Works with async iterables
|
||||||
|
const asyncLines = readLinesFromNetwork()
|
||||||
|
for await (const event of decodeStream(asyncLines)) {
|
||||||
|
console.log(event)
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
## Round-Trip Compatibility
|
## Round-Trip Compatibility
|
||||||
|
|
||||||
TOON provides lossless round-trips after normalization:
|
TOON provides lossless round-trips after normalization:
|
||||||
|
|||||||
@@ -1,10 +1,15 @@
|
|||||||
// @ts-check
|
// @ts-check
|
||||||
import antfu from '@antfu/eslint-config'
|
import antfu from '@antfu/eslint-config'
|
||||||
|
|
||||||
export default antfu().append({
|
export default antfu({
|
||||||
|
rules: {
|
||||||
|
'no-cond-assign': 'off',
|
||||||
|
},
|
||||||
|
}).append({
|
||||||
files: ['README.md', 'SPEC.md', '**/docs/**/*'],
|
files: ['README.md', 'SPEC.md', '**/docs/**/*'],
|
||||||
rules: {
|
rules: {
|
||||||
'yaml/quotes': 'off',
|
'import/no-duplicates': 'off',
|
||||||
'style/no-tabs': 'off',
|
'style/no-tabs': 'off',
|
||||||
|
'yaml/quotes': 'off',
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -134,8 +134,9 @@ cat million-records.toon | toon --decode > output.json
|
|||||||
|
|
||||||
**Memory efficiency:**
|
**Memory efficiency:**
|
||||||
- **Encode (JSON → TOON)**: Streams TOON lines to output without full string in memory
|
- **Encode (JSON → TOON)**: Streams TOON lines to output without full string in memory
|
||||||
- **Decode (TOON → JSON)**: Streams JSON tokens to output without full string in memory
|
- **Decode (TOON → JSON)**: Uses the same event-based streaming decoder as the `decodeStream` API in `@toon-format/toon`, streaming JSON tokens to output without full string in memory
|
||||||
- Peak memory usage scales with data depth, not total size
|
- Peak memory usage scales with data depth, not total size
|
||||||
|
- When `--expand-paths safe` is enabled, decode falls back to non-streaming mode internally to apply deep-merge expansion before writing JSON
|
||||||
|
|
||||||
> [!NOTE]
|
> [!NOTE]
|
||||||
> When using `--stats` with encode, the full output string is kept in memory for token counting. Omit `--stats` for maximum memory efficiency with very large datasets.
|
> When using `--stats` with encode, the full output string is kept in memory for token counting. Omit `--stats` for maximum memory efficiency with very large datasets.
|
||||||
|
|||||||
@@ -1,14 +1,15 @@
|
|||||||
import type { FileHandle } from 'node:fs/promises'
|
import type { FileHandle } from 'node:fs/promises'
|
||||||
import type { DecodeOptions, EncodeOptions } from '../../toon/src'
|
import type { DecodeOptions, DecodeStreamOptions, EncodeOptions } from '../../toon/src'
|
||||||
import type { InputSource } from './types'
|
import type { InputSource } from './types'
|
||||||
import * as fsp from 'node:fs/promises'
|
import * as fsp from 'node:fs/promises'
|
||||||
import * as path from 'node:path'
|
import * as path from 'node:path'
|
||||||
import process from 'node:process'
|
import process from 'node:process'
|
||||||
import { consola } from 'consola'
|
import { consola } from 'consola'
|
||||||
import { estimateTokenCount } from 'tokenx'
|
import { estimateTokenCount } from 'tokenx'
|
||||||
import { decode, encode, encodeLines } from '../../toon/src'
|
import { decode, decodeStream, encode, encodeLines } from '../../toon/src'
|
||||||
|
import { jsonStreamFromEvents } from './json-from-events'
|
||||||
import { jsonStringifyLines } from './json-stringify-stream'
|
import { jsonStringifyLines } from './json-stringify-stream'
|
||||||
import { formatInputLabel, readInput } from './utils'
|
import { formatInputLabel, readInput, readLinesFromSource } from './utils'
|
||||||
|
|
||||||
export async function encodeToToon(config: {
|
export async function encodeToToon(config: {
|
||||||
input: InputSource
|
input: InputSource
|
||||||
@@ -80,6 +81,8 @@ export async function decodeToJson(config: {
|
|||||||
strict: NonNullable<DecodeOptions['strict']>
|
strict: NonNullable<DecodeOptions['strict']>
|
||||||
expandPaths?: NonNullable<DecodeOptions['expandPaths']>
|
expandPaths?: NonNullable<DecodeOptions['expandPaths']>
|
||||||
}): Promise<void> {
|
}): Promise<void> {
|
||||||
|
// Path expansion requires full value in memory, so use non-streaming path
|
||||||
|
if (config.expandPaths === 'safe') {
|
||||||
const toonContent = await readInput(config.input)
|
const toonContent = await readInput(config.input)
|
||||||
|
|
||||||
let data: unknown
|
let data: unknown
|
||||||
@@ -96,6 +99,25 @@ export async function decodeToJson(config: {
|
|||||||
}
|
}
|
||||||
|
|
||||||
await writeStreamingJson(jsonStringifyLines(data, config.indent), config.output)
|
await writeStreamingJson(jsonStringifyLines(data, config.indent), config.output)
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
try {
|
||||||
|
const lineSource = readLinesFromSource(config.input)
|
||||||
|
|
||||||
|
const decodeStreamOptions: DecodeStreamOptions = {
|
||||||
|
indent: config.indent,
|
||||||
|
strict: config.strict,
|
||||||
|
}
|
||||||
|
|
||||||
|
const events = decodeStream(lineSource, decodeStreamOptions)
|
||||||
|
const jsonChunks = jsonStreamFromEvents(events, config.indent)
|
||||||
|
|
||||||
|
await writeStreamingJson(jsonChunks, config.output)
|
||||||
|
}
|
||||||
|
catch (error) {
|
||||||
|
throw new Error(`Failed to decode TOON: ${error instanceof Error ? error.message : String(error)}`)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (config.output) {
|
if (config.output) {
|
||||||
const relativeInputPath = formatInputLabel(config.input)
|
const relativeInputPath = formatInputLabel(config.input)
|
||||||
@@ -109,7 +131,7 @@ export async function decodeToJson(config: {
|
|||||||
* Chunks are written one at a time without building the full string in memory.
|
* Chunks are written one at a time without building the full string in memory.
|
||||||
*/
|
*/
|
||||||
async function writeStreamingJson(
|
async function writeStreamingJson(
|
||||||
chunks: Iterable<string>,
|
chunks: AsyncIterable<string> | Iterable<string>,
|
||||||
outputPath?: string,
|
outputPath?: string,
|
||||||
): Promise<void> {
|
): Promise<void> {
|
||||||
// Stream to file using fs/promises API
|
// Stream to file using fs/promises API
|
||||||
@@ -119,7 +141,7 @@ async function writeStreamingJson(
|
|||||||
try {
|
try {
|
||||||
fileHandle = await fsp.open(outputPath, 'w')
|
fileHandle = await fsp.open(outputPath, 'w')
|
||||||
|
|
||||||
for (const chunk of chunks) {
|
for await (const chunk of chunks) {
|
||||||
await fileHandle.write(chunk)
|
await fileHandle.write(chunk)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -129,7 +151,7 @@ async function writeStreamingJson(
|
|||||||
}
|
}
|
||||||
// Stream to stdout
|
// Stream to stdout
|
||||||
else {
|
else {
|
||||||
for (const chunk of chunks) {
|
for await (const chunk of chunks) {
|
||||||
process.stdout.write(chunk)
|
process.stdout.write(chunk)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
217
packages/cli/src/json-from-events.ts
Normal file
217
packages/cli/src/json-from-events.ts
Normal file
@@ -0,0 +1,217 @@
|
|||||||
|
import type { JsonStreamEvent } from '../../toon/src/types'
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Context for tracking JSON structure state during event streaming.
|
||||||
|
*/
|
||||||
|
type JsonContext
|
||||||
|
= | { type: 'object', needsComma: boolean, expectValue: boolean }
|
||||||
|
| { type: 'array', needsComma: boolean }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Converts a stream of `JsonStreamEvent` into formatted JSON string chunks.
|
||||||
|
*
|
||||||
|
* Similar to `jsonStringifyLines` but driven by events instead of a value tree.
|
||||||
|
* Useful for streaming TOON decode directly to JSON output without building
|
||||||
|
* the full data structure in memory.
|
||||||
|
*
|
||||||
|
* @param events - Async iterable of JSON stream events
|
||||||
|
* @param indent - Number of spaces for indentation (0 = compact, >0 = pretty)
|
||||||
|
* @returns Async iterable of JSON string chunks
|
||||||
|
*
|
||||||
|
* @example
|
||||||
|
* ```ts
|
||||||
|
* const lines = readLinesFromSource(input)
|
||||||
|
* const events = decodeStream(lines)
|
||||||
|
* for await (const chunk of jsonStreamFromEvents(events, 2)) {
|
||||||
|
* process.stdout.write(chunk)
|
||||||
|
* }
|
||||||
|
* ```
|
||||||
|
*/
|
||||||
|
export async function* jsonStreamFromEvents(
|
||||||
|
events: AsyncIterable<JsonStreamEvent>,
|
||||||
|
indent: number = 2,
|
||||||
|
): AsyncIterable<string> {
|
||||||
|
const stack: JsonContext[] = []
|
||||||
|
let depth = 0
|
||||||
|
|
||||||
|
for await (const event of events) {
|
||||||
|
const parent = stack.length > 0 ? stack[stack.length - 1] : undefined
|
||||||
|
|
||||||
|
switch (event.type) {
|
||||||
|
case 'startObject': {
|
||||||
|
// Emit comma if needed (inside array or after previous object field value)
|
||||||
|
if (parent) {
|
||||||
|
if (parent.type === 'array' && parent.needsComma) {
|
||||||
|
yield ','
|
||||||
|
}
|
||||||
|
else if (parent.type === 'object' && !parent.expectValue) {
|
||||||
|
// Object field value already emitted, this is a nested object after a key
|
||||||
|
// The comma is handled by the key event
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Emit newline and indent for pretty printing
|
||||||
|
if (indent > 0 && parent) {
|
||||||
|
if (parent.type === 'array') {
|
||||||
|
yield '\n'
|
||||||
|
yield ' '.repeat(depth * indent)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
yield '{'
|
||||||
|
stack.push({ type: 'object', needsComma: false, expectValue: false })
|
||||||
|
depth++
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
case 'endObject': {
|
||||||
|
const context = stack.pop()
|
||||||
|
if (!context || context.type !== 'object') {
|
||||||
|
throw new Error('Mismatched endObject event')
|
||||||
|
}
|
||||||
|
|
||||||
|
depth--
|
||||||
|
|
||||||
|
// Emit newline and indent for closing brace (pretty print)
|
||||||
|
if (indent > 0 && context.needsComma) {
|
||||||
|
yield '\n'
|
||||||
|
yield ' '.repeat(depth * indent)
|
||||||
|
}
|
||||||
|
|
||||||
|
yield '}'
|
||||||
|
|
||||||
|
// Mark parent as needing comma for next item
|
||||||
|
const newParent = stack.length > 0 ? stack[stack.length - 1] : undefined
|
||||||
|
if (newParent) {
|
||||||
|
if (newParent.type === 'object') {
|
||||||
|
newParent.expectValue = false
|
||||||
|
newParent.needsComma = true
|
||||||
|
}
|
||||||
|
else if (newParent.type === 'array') {
|
||||||
|
newParent.needsComma = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
case 'startArray': {
|
||||||
|
// Emit comma if needed
|
||||||
|
if (parent) {
|
||||||
|
if (parent.type === 'array' && parent.needsComma) {
|
||||||
|
yield ','
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Emit newline and indent for pretty printing
|
||||||
|
if (indent > 0 && parent) {
|
||||||
|
if (parent.type === 'array') {
|
||||||
|
yield '\n'
|
||||||
|
yield ' '.repeat(depth * indent)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
yield '['
|
||||||
|
stack.push({
|
||||||
|
type: 'array',
|
||||||
|
needsComma: false,
|
||||||
|
})
|
||||||
|
depth++
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
case 'endArray': {
|
||||||
|
const context = stack.pop()
|
||||||
|
if (!context || context.type !== 'array') {
|
||||||
|
throw new Error('Mismatched endArray event')
|
||||||
|
}
|
||||||
|
|
||||||
|
depth--
|
||||||
|
|
||||||
|
// Emit newline and indent for closing bracket (pretty print)
|
||||||
|
if (indent > 0 && context.needsComma) {
|
||||||
|
yield '\n'
|
||||||
|
yield ' '.repeat(depth * indent)
|
||||||
|
}
|
||||||
|
|
||||||
|
yield ']'
|
||||||
|
|
||||||
|
// Mark parent as needing comma for next item
|
||||||
|
const newParent = stack.length > 0 ? stack[stack.length - 1] : undefined
|
||||||
|
if (newParent) {
|
||||||
|
if (newParent.type === 'object') {
|
||||||
|
newParent.expectValue = false
|
||||||
|
newParent.needsComma = true
|
||||||
|
}
|
||||||
|
else if (newParent.type === 'array') {
|
||||||
|
newParent.needsComma = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
case 'key': {
|
||||||
|
if (!parent || parent.type !== 'object') {
|
||||||
|
throw new Error('Key event outside of object context')
|
||||||
|
}
|
||||||
|
|
||||||
|
// Emit comma before this field if needed
|
||||||
|
if (parent.needsComma) {
|
||||||
|
yield ','
|
||||||
|
}
|
||||||
|
|
||||||
|
// Emit newline and indent (pretty print)
|
||||||
|
if (indent > 0) {
|
||||||
|
yield '\n'
|
||||||
|
yield ' '.repeat(depth * indent)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Emit key
|
||||||
|
yield JSON.stringify(event.key)
|
||||||
|
yield indent > 0 ? ': ' : ':'
|
||||||
|
|
||||||
|
parent.expectValue = true
|
||||||
|
parent.needsComma = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
case 'primitive': {
|
||||||
|
// Emit comma if needed
|
||||||
|
if (parent) {
|
||||||
|
if (parent.type === 'array' && parent.needsComma) {
|
||||||
|
yield ','
|
||||||
|
}
|
||||||
|
else if (parent.type === 'object' && !parent.expectValue) {
|
||||||
|
// This shouldn't happen in well-formed events
|
||||||
|
throw new Error('Primitive event in object without preceding key')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Emit newline and indent for array items (pretty print)
|
||||||
|
if (indent > 0 && parent && parent.type === 'array') {
|
||||||
|
yield '\n'
|
||||||
|
yield ' '.repeat(depth * indent)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Emit primitive value
|
||||||
|
yield JSON.stringify(event.value)
|
||||||
|
|
||||||
|
// Update parent context
|
||||||
|
if (parent) {
|
||||||
|
if (parent.type === 'object') {
|
||||||
|
parent.expectValue = false
|
||||||
|
// needsComma already true from key event
|
||||||
|
}
|
||||||
|
else if (parent.type === 'array') {
|
||||||
|
parent.needsComma = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure stack is empty
|
||||||
|
if (stack.length !== 0) {
|
||||||
|
throw new Error('Incomplete event stream: unclosed objects or arrays')
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,4 +1,5 @@
|
|||||||
import type { InputSource } from './types'
|
import type { InputSource } from './types'
|
||||||
|
import { createReadStream } from 'node:fs'
|
||||||
import * as fsp from 'node:fs/promises'
|
import * as fsp from 'node:fs/promises'
|
||||||
import * as path from 'node:path'
|
import * as path from 'node:path'
|
||||||
import process from 'node:process'
|
import process from 'node:process'
|
||||||
@@ -77,3 +78,32 @@ function readFromStdin(): Promise<string> {
|
|||||||
stdin.resume()
|
stdin.resume()
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export async function* readLinesFromSource(source: InputSource): AsyncIterable<string> {
|
||||||
|
const stream = source.type === 'stdin'
|
||||||
|
? process.stdin
|
||||||
|
: createReadStream(source.path, { encoding: 'utf-8' })
|
||||||
|
|
||||||
|
// Explicitly set encoding for stdin
|
||||||
|
if (source.type === 'stdin') {
|
||||||
|
stream.setEncoding('utf-8')
|
||||||
|
}
|
||||||
|
|
||||||
|
let buffer = ''
|
||||||
|
|
||||||
|
for await (const chunk of stream) {
|
||||||
|
buffer += chunk
|
||||||
|
let index: number
|
||||||
|
|
||||||
|
while ((index = buffer.indexOf('\n')) !== -1) {
|
||||||
|
const line = buffer.slice(0, index)
|
||||||
|
buffer = buffer.slice(index + 1)
|
||||||
|
yield line
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Emit last line if buffer is not empty and doesn't end with newline
|
||||||
|
if (buffer.length > 0) {
|
||||||
|
yield buffer
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
334
packages/toon/src/decode/event-builder.ts
Normal file
334
packages/toon/src/decode/event-builder.ts
Normal file
@@ -0,0 +1,334 @@
|
|||||||
|
import type { JsonObject, JsonStreamEvent, JsonValue } from '../types'
|
||||||
|
import { QUOTED_KEY_MARKER } from './expand'
|
||||||
|
|
||||||
|
// #region Build context types
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Stack context for building JSON values from events.
|
||||||
|
*/
|
||||||
|
type BuildContext
|
||||||
|
= | { type: 'object', obj: JsonObject, currentKey?: string, quotedKeys: Set<string> }
|
||||||
|
| { type: 'array', arr: JsonValue[] }
|
||||||
|
|
||||||
|
// #endregion
|
||||||
|
|
||||||
|
// #region Synchronous AST builder
|
||||||
|
|
||||||
|
export function buildValueFromEvents(events: Iterable<JsonStreamEvent>): JsonValue {
|
||||||
|
const stack: BuildContext[] = []
|
||||||
|
let root: JsonValue | undefined
|
||||||
|
|
||||||
|
for (const event of events) {
|
||||||
|
switch (event.type) {
|
||||||
|
case 'startObject': {
|
||||||
|
const obj: JsonObject = {}
|
||||||
|
const quotedKeys = new Set<string>()
|
||||||
|
|
||||||
|
if (stack.length === 0) {
|
||||||
|
// Root object
|
||||||
|
stack.push({ type: 'object', obj, quotedKeys })
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
const parent = stack[stack.length - 1]!
|
||||||
|
if (parent.type === 'object') {
|
||||||
|
if (parent.currentKey === undefined) {
|
||||||
|
throw new Error('Object startObject event without preceding key')
|
||||||
|
}
|
||||||
|
|
||||||
|
parent.obj[parent.currentKey] = obj
|
||||||
|
parent.currentKey = undefined
|
||||||
|
}
|
||||||
|
else if (parent.type === 'array') {
|
||||||
|
parent.arr.push(obj)
|
||||||
|
}
|
||||||
|
|
||||||
|
stack.push({ type: 'object', obj, quotedKeys })
|
||||||
|
}
|
||||||
|
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
case 'endObject': {
|
||||||
|
if (stack.length === 0) {
|
||||||
|
throw new Error('Unexpected endObject event')
|
||||||
|
}
|
||||||
|
|
||||||
|
const context = stack.pop()!
|
||||||
|
if (context.type !== 'object') {
|
||||||
|
throw new Error('Mismatched endObject event')
|
||||||
|
}
|
||||||
|
|
||||||
|
// Attach quoted keys metadata if any keys were quoted
|
||||||
|
if (context.quotedKeys.size > 0) {
|
||||||
|
Object.defineProperty(context.obj, QUOTED_KEY_MARKER, {
|
||||||
|
value: context.quotedKeys,
|
||||||
|
enumerable: false,
|
||||||
|
writable: false,
|
||||||
|
configurable: false,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
if (stack.length === 0) {
|
||||||
|
root = context.obj
|
||||||
|
}
|
||||||
|
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
case 'startArray': {
|
||||||
|
const arr: JsonValue[] = []
|
||||||
|
|
||||||
|
if (stack.length === 0) {
|
||||||
|
// Root array
|
||||||
|
stack.push({ type: 'array', arr })
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
const parent = stack[stack.length - 1]!
|
||||||
|
if (parent.type === 'object') {
|
||||||
|
if (parent.currentKey === undefined) {
|
||||||
|
throw new Error('Array startArray event without preceding key')
|
||||||
|
}
|
||||||
|
parent.obj[parent.currentKey] = arr
|
||||||
|
parent.currentKey = undefined
|
||||||
|
}
|
||||||
|
else if (parent.type === 'array') {
|
||||||
|
parent.arr.push(arr)
|
||||||
|
}
|
||||||
|
|
||||||
|
stack.push({ type: 'array', arr })
|
||||||
|
}
|
||||||
|
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
case 'endArray': {
|
||||||
|
if (stack.length === 0) {
|
||||||
|
throw new Error('Unexpected endArray event')
|
||||||
|
}
|
||||||
|
|
||||||
|
const context = stack.pop()!
|
||||||
|
if (context.type !== 'array') {
|
||||||
|
throw new Error('Mismatched endArray event')
|
||||||
|
}
|
||||||
|
|
||||||
|
if (stack.length === 0) {
|
||||||
|
root = context.arr
|
||||||
|
}
|
||||||
|
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
case 'key': {
|
||||||
|
if (stack.length === 0) {
|
||||||
|
throw new Error('Key event outside of object context')
|
||||||
|
}
|
||||||
|
|
||||||
|
const parent = stack[stack.length - 1]!
|
||||||
|
if (parent.type !== 'object') {
|
||||||
|
throw new Error('Key event in non-object context')
|
||||||
|
}
|
||||||
|
|
||||||
|
parent.currentKey = event.key
|
||||||
|
|
||||||
|
// Track quoted keys for path expansion
|
||||||
|
if (event.wasQuoted) {
|
||||||
|
parent.quotedKeys.add(event.key)
|
||||||
|
}
|
||||||
|
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
case 'primitive': {
|
||||||
|
if (stack.length === 0) {
|
||||||
|
// Root primitive
|
||||||
|
root = event.value
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
const parent = stack[stack.length - 1]!
|
||||||
|
if (parent.type === 'object') {
|
||||||
|
if (parent.currentKey === undefined) {
|
||||||
|
throw new Error('Primitive event without preceding key in object')
|
||||||
|
}
|
||||||
|
parent.obj[parent.currentKey] = event.value
|
||||||
|
parent.currentKey = undefined
|
||||||
|
}
|
||||||
|
else if (parent.type === 'array') {
|
||||||
|
parent.arr.push(event.value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (stack.length !== 0) {
|
||||||
|
throw new Error('Incomplete event stream: stack not empty at end')
|
||||||
|
}
|
||||||
|
|
||||||
|
if (root === undefined) {
|
||||||
|
throw new Error('No root value built from events')
|
||||||
|
}
|
||||||
|
|
||||||
|
return root
|
||||||
|
}
|
||||||
|
|
||||||
|
// #endregion
|
||||||
|
|
||||||
|
// #region Asynchronous AST builder
|
||||||
|
|
||||||
|
export async function buildValueFromEventsAsync(events: AsyncIterable<JsonStreamEvent>): Promise<JsonValue> {
|
||||||
|
const stack: BuildContext[] = []
|
||||||
|
let root: JsonValue | undefined
|
||||||
|
|
||||||
|
for await (const event of events) {
|
||||||
|
switch (event.type) {
|
||||||
|
case 'startObject': {
|
||||||
|
const obj: JsonObject = {}
|
||||||
|
const quotedKeys = new Set<string>()
|
||||||
|
|
||||||
|
if (stack.length === 0) {
|
||||||
|
stack.push({ type: 'object', obj, quotedKeys })
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
const parent = stack[stack.length - 1]!
|
||||||
|
if (parent.type === 'object') {
|
||||||
|
if (parent.currentKey === undefined) {
|
||||||
|
throw new Error('Object startObject event without preceding key')
|
||||||
|
}
|
||||||
|
parent.obj[parent.currentKey] = obj
|
||||||
|
parent.currentKey = undefined
|
||||||
|
}
|
||||||
|
else if (parent.type === 'array') {
|
||||||
|
parent.arr.push(obj)
|
||||||
|
}
|
||||||
|
|
||||||
|
stack.push({ type: 'object', obj, quotedKeys })
|
||||||
|
}
|
||||||
|
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
case 'endObject': {
|
||||||
|
if (stack.length === 0) {
|
||||||
|
throw new Error('Unexpected endObject event')
|
||||||
|
}
|
||||||
|
|
||||||
|
const context = stack.pop()!
|
||||||
|
if (context.type !== 'object') {
|
||||||
|
throw new Error('Mismatched endObject event')
|
||||||
|
}
|
||||||
|
|
||||||
|
// Attach quoted keys metadata if any keys were quoted
|
||||||
|
if (context.quotedKeys.size > 0) {
|
||||||
|
Object.defineProperty(context.obj, QUOTED_KEY_MARKER, {
|
||||||
|
value: context.quotedKeys,
|
||||||
|
enumerable: false,
|
||||||
|
writable: false,
|
||||||
|
configurable: false,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
if (stack.length === 0) {
|
||||||
|
root = context.obj
|
||||||
|
}
|
||||||
|
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
case 'startArray': {
|
||||||
|
const arr: JsonValue[] = []
|
||||||
|
if (stack.length === 0) {
|
||||||
|
stack.push({ type: 'array', arr })
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
const parent = stack[stack.length - 1]!
|
||||||
|
if (parent.type === 'object') {
|
||||||
|
if (parent.currentKey === undefined) {
|
||||||
|
throw new Error('Array startArray event without preceding key')
|
||||||
|
}
|
||||||
|
parent.obj[parent.currentKey] = arr
|
||||||
|
parent.currentKey = undefined
|
||||||
|
}
|
||||||
|
else if (parent.type === 'array') {
|
||||||
|
parent.arr.push(arr)
|
||||||
|
}
|
||||||
|
|
||||||
|
stack.push({ type: 'array', arr })
|
||||||
|
}
|
||||||
|
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
case 'endArray': {
|
||||||
|
if (stack.length === 0) {
|
||||||
|
throw new Error('Unexpected endArray event')
|
||||||
|
}
|
||||||
|
|
||||||
|
const context = stack.pop()!
|
||||||
|
if (context.type !== 'array') {
|
||||||
|
throw new Error('Mismatched endArray event')
|
||||||
|
}
|
||||||
|
|
||||||
|
if (stack.length === 0) {
|
||||||
|
root = context.arr
|
||||||
|
}
|
||||||
|
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
case 'key': {
|
||||||
|
if (stack.length === 0) {
|
||||||
|
throw new Error('Key event outside of object context')
|
||||||
|
}
|
||||||
|
|
||||||
|
const parent = stack[stack.length - 1]!
|
||||||
|
if (parent.type !== 'object') {
|
||||||
|
throw new Error('Key event in non-object context')
|
||||||
|
}
|
||||||
|
|
||||||
|
parent.currentKey = event.key
|
||||||
|
|
||||||
|
// Track quoted keys for path expansion
|
||||||
|
if (event.wasQuoted) {
|
||||||
|
parent.quotedKeys.add(event.key)
|
||||||
|
}
|
||||||
|
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
case 'primitive': {
|
||||||
|
if (stack.length === 0) {
|
||||||
|
root = event.value
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
const parent = stack[stack.length - 1]!
|
||||||
|
if (parent.type === 'object') {
|
||||||
|
if (parent.currentKey === undefined) {
|
||||||
|
throw new Error('Primitive event without preceding key in object')
|
||||||
|
}
|
||||||
|
parent.obj[parent.currentKey] = event.value
|
||||||
|
parent.currentKey = undefined
|
||||||
|
}
|
||||||
|
else if (parent.type === 'array') {
|
||||||
|
parent.arr.push(event.value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (stack.length !== 0) {
|
||||||
|
throw new Error('Incomplete event stream: stack not empty at end')
|
||||||
|
}
|
||||||
|
|
||||||
|
if (root === undefined) {
|
||||||
|
throw new Error('No root value built from events')
|
||||||
|
}
|
||||||
|
|
||||||
|
return root
|
||||||
|
}
|
||||||
|
|
||||||
|
// #endregion
|
||||||
@@ -12,7 +12,7 @@ import { isIdentifierSegment } from '../shared/validation'
|
|||||||
export const QUOTED_KEY_MARKER: unique symbol = Symbol('quotedKey')
|
export const QUOTED_KEY_MARKER: unique symbol = Symbol('quotedKey')
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Type for objects that may have quoted key metadata attached.
|
* Objects that may have quoted key metadata attached.
|
||||||
*/
|
*/
|
||||||
export interface ObjectWithQuotedKeys extends JsonObject {
|
export interface ObjectWithQuotedKeys extends JsonObject {
|
||||||
[QUOTED_KEY_MARKER]?: Set<string>
|
[QUOTED_KEY_MARKER]?: Set<string>
|
||||||
@@ -226,6 +226,10 @@ function mergeObjects(
|
|||||||
|
|
||||||
// #endregion
|
// #endregion
|
||||||
|
|
||||||
|
// #region Type Guards
|
||||||
|
|
||||||
function canMerge(a: JsonValue, b: JsonValue): a is JsonObject {
|
function canMerge(a: JsonValue, b: JsonValue): a is JsonObject {
|
||||||
return isJsonObject(a) && isJsonObject(b)
|
return isJsonObject(a) && isJsonObject(b)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// #endregion
|
||||||
|
|||||||
@@ -305,11 +305,11 @@ export function parseKeyToken(content: string, start: number): { key: string, en
|
|||||||
|
|
||||||
// #region Array content detection helpers
|
// #region Array content detection helpers
|
||||||
|
|
||||||
export function isArrayHeaderAfterHyphen(content: string): boolean {
|
export function isArrayHeaderContent(content: string): boolean {
|
||||||
return content.trim().startsWith(OPEN_BRACKET) && findUnquotedChar(content, COLON) !== -1
|
return content.trim().startsWith(OPEN_BRACKET) && findUnquotedChar(content, COLON) !== -1
|
||||||
}
|
}
|
||||||
|
|
||||||
export function isObjectFirstFieldAfterHyphen(content: string): boolean {
|
export function isKeyValueContent(content: string): boolean {
|
||||||
return findUnquotedChar(content, COLON) !== -1
|
return findUnquotedChar(content, COLON) !== -1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,68 +1,34 @@
|
|||||||
import type { BlankLineInfo, Depth, ParsedLine } from '../types'
|
import type { BlankLineInfo, Depth, ParsedLine } from '../types'
|
||||||
import { SPACE, TAB } from '../constants'
|
import { SPACE, TAB } from '../constants'
|
||||||
|
|
||||||
export interface ScanResult {
|
// #region Scan state
|
||||||
lines: ParsedLine[]
|
|
||||||
|
export interface StreamingScanState {
|
||||||
|
lineNumber: number
|
||||||
blankLines: BlankLineInfo[]
|
blankLines: BlankLineInfo[]
|
||||||
}
|
}
|
||||||
|
|
||||||
export class LineCursor {
|
export function createScanState(): StreamingScanState {
|
||||||
private lines: ParsedLine[]
|
return {
|
||||||
private index: number
|
lineNumber: 0,
|
||||||
private blankLines: BlankLineInfo[]
|
blankLines: [],
|
||||||
|
|
||||||
constructor(lines: ParsedLine[], blankLines: BlankLineInfo[] = []) {
|
|
||||||
this.lines = lines
|
|
||||||
this.index = 0
|
|
||||||
this.blankLines = blankLines
|
|
||||||
}
|
|
||||||
|
|
||||||
getBlankLines(): BlankLineInfo[] {
|
|
||||||
return this.blankLines
|
|
||||||
}
|
|
||||||
|
|
||||||
peek(): ParsedLine | undefined {
|
|
||||||
return this.lines[this.index]
|
|
||||||
}
|
|
||||||
|
|
||||||
next(): ParsedLine | undefined {
|
|
||||||
return this.lines[this.index++]
|
|
||||||
}
|
|
||||||
|
|
||||||
current(): ParsedLine | undefined {
|
|
||||||
return this.index > 0 ? this.lines[this.index - 1] : undefined
|
|
||||||
}
|
|
||||||
|
|
||||||
advance(): void {
|
|
||||||
this.index++
|
|
||||||
}
|
|
||||||
|
|
||||||
atEnd(): boolean {
|
|
||||||
return this.index >= this.lines.length
|
|
||||||
}
|
|
||||||
|
|
||||||
get length(): number {
|
|
||||||
return this.lines.length
|
|
||||||
}
|
|
||||||
|
|
||||||
peekAtDepth(targetDepth: Depth): ParsedLine | undefined {
|
|
||||||
const line = this.peek()
|
|
||||||
return line?.depth === targetDepth ? line : undefined
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export function toParsedLines(source: string, indentSize: number, strict: boolean): ScanResult {
|
// #endregion
|
||||||
if (!source.trim()) {
|
|
||||||
return { lines: [], blankLines: [] }
|
|
||||||
}
|
|
||||||
|
|
||||||
const lines = source.split('\n')
|
// #region Line parsing
|
||||||
const parsed: ParsedLine[] = []
|
|
||||||
const blankLines: BlankLineInfo[] = []
|
|
||||||
|
|
||||||
for (let i = 0; i < lines.length; i++) {
|
export function parseLineIncremental(
|
||||||
const raw = lines[i]!
|
raw: string,
|
||||||
const lineNumber = i + 1
|
state: StreamingScanState,
|
||||||
|
indentSize: number,
|
||||||
|
strict: boolean,
|
||||||
|
): ParsedLine | undefined {
|
||||||
|
state.lineNumber++
|
||||||
|
const lineNumber = state.lineNumber
|
||||||
|
|
||||||
|
// Count leading spaces
|
||||||
let indent = 0
|
let indent = 0
|
||||||
while (indent < raw.length && raw[indent] === SPACE) {
|
while (indent < raw.length && raw[indent] === SPACE) {
|
||||||
indent++
|
indent++
|
||||||
@@ -73,8 +39,8 @@ export function toParsedLines(source: string, indentSize: number, strict: boolea
|
|||||||
// Track blank lines
|
// Track blank lines
|
||||||
if (!content.trim()) {
|
if (!content.trim()) {
|
||||||
const depth = computeDepthFromIndent(indent, indentSize)
|
const depth = computeDepthFromIndent(indent, indentSize)
|
||||||
blankLines.push({ lineNumber, indent, depth })
|
state.blankLines.push({ lineNumber, indent, depth })
|
||||||
continue
|
return undefined
|
||||||
}
|
}
|
||||||
|
|
||||||
const depth = computeDepthFromIndent(indent, indentSize)
|
const depth = computeDepthFromIndent(indent, indentSize)
|
||||||
@@ -83,7 +49,10 @@ export function toParsedLines(source: string, indentSize: number, strict: boolea
|
|||||||
if (strict) {
|
if (strict) {
|
||||||
// Find the full leading whitespace region (spaces and tabs)
|
// Find the full leading whitespace region (spaces and tabs)
|
||||||
let whitespaceEndIndex = 0
|
let whitespaceEndIndex = 0
|
||||||
while (whitespaceEndIndex < raw.length && (raw[whitespaceEndIndex] === SPACE || raw[whitespaceEndIndex] === TAB)) {
|
while (
|
||||||
|
whitespaceEndIndex < raw.length
|
||||||
|
&& (raw[whitespaceEndIndex] === SPACE || raw[whitespaceEndIndex] === TAB)
|
||||||
|
) {
|
||||||
whitespaceEndIndex++
|
whitespaceEndIndex++
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -94,16 +63,47 @@ export function toParsedLines(source: string, indentSize: number, strict: boolea
|
|||||||
|
|
||||||
// Check for exact multiples of indentSize
|
// Check for exact multiples of indentSize
|
||||||
if (indent > 0 && indent % indentSize !== 0) {
|
if (indent > 0 && indent % indentSize !== 0) {
|
||||||
throw new SyntaxError(`Line ${lineNumber}: Indentation must be exact multiple of ${indentSize}, but found ${indent} spaces`)
|
throw new SyntaxError(
|
||||||
|
`Line ${lineNumber}: Indentation must be exact multiple of ${indentSize}, but found ${indent} spaces`,
|
||||||
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
parsed.push({ raw, indent, content, depth, lineNumber })
|
return { raw, indent, content, depth, lineNumber }
|
||||||
}
|
}
|
||||||
|
|
||||||
return { lines: parsed, blankLines }
|
export function* parseLinesSync(
|
||||||
|
source: Iterable<string>,
|
||||||
|
indentSize: number,
|
||||||
|
strict: boolean,
|
||||||
|
state: StreamingScanState,
|
||||||
|
): Generator<ParsedLine> {
|
||||||
|
for (const raw of source) {
|
||||||
|
const parsedLine = parseLineIncremental(raw, state, indentSize, strict)
|
||||||
|
|
||||||
|
if (parsedLine !== undefined) {
|
||||||
|
yield parsedLine
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function* parseLinesAsync(
|
||||||
|
source: AsyncIterable<string>,
|
||||||
|
indentSize: number,
|
||||||
|
strict: boolean,
|
||||||
|
state: StreamingScanState,
|
||||||
|
): AsyncGenerator<ParsedLine> {
|
||||||
|
for await (const raw of source) {
|
||||||
|
const parsedLine = parseLineIncremental(raw, state, indentSize, strict)
|
||||||
|
|
||||||
|
if (parsedLine !== undefined) {
|
||||||
|
yield parsedLine
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function computeDepthFromIndent(indentSpaces: number, indentSize: number): Depth {
|
function computeDepthFromIndent(indentSpaces: number, indentSize: number): Depth {
|
||||||
return Math.floor(indentSpaces / indentSize)
|
return Math.floor(indentSpaces / indentSize)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// #endregion
|
||||||
|
|||||||
@@ -1,7 +1,8 @@
|
|||||||
import type { ArrayHeaderInfo, BlankLineInfo, Delimiter, Depth, ResolvedDecodeOptions } from '../types'
|
import type { ArrayHeaderInfo, BlankLineInfo, Delimiter, Depth, ParsedLine } from '../types'
|
||||||
import type { LineCursor } from './scanner'
|
|
||||||
import { COLON, LIST_ITEM_PREFIX } from '../constants'
|
import { COLON, LIST_ITEM_PREFIX } from '../constants'
|
||||||
|
|
||||||
|
// #region Count and structure validation
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Asserts that the actual count matches the expected count in strict mode.
|
* Asserts that the actual count matches the expected count in strict mode.
|
||||||
*/
|
*/
|
||||||
@@ -9,7 +10,7 @@ export function assertExpectedCount(
|
|||||||
actual: number,
|
actual: number,
|
||||||
expected: number,
|
expected: number,
|
||||||
itemType: string,
|
itemType: string,
|
||||||
options: ResolvedDecodeOptions,
|
options: { strict: boolean },
|
||||||
): void {
|
): void {
|
||||||
if (options.strict && actual !== expected) {
|
if (options.strict && actual !== expected) {
|
||||||
throw new RangeError(`Expected ${expected} ${itemType}, but got ${actual}`)
|
throw new RangeError(`Expected ${expected} ${itemType}, but got ${actual}`)
|
||||||
@@ -20,11 +21,10 @@ export function assertExpectedCount(
|
|||||||
* Validates that there are no extra list items beyond the expected count.
|
* Validates that there are no extra list items beyond the expected count.
|
||||||
*/
|
*/
|
||||||
export function validateNoExtraListItems(
|
export function validateNoExtraListItems(
|
||||||
cursor: LineCursor,
|
nextLine: ParsedLine | undefined,
|
||||||
itemDepth: Depth,
|
itemDepth: Depth,
|
||||||
expectedCount: number,
|
expectedCount: number,
|
||||||
): void {
|
): void {
|
||||||
const nextLine = cursor.peek()
|
|
||||||
if (nextLine?.depth === itemDepth && nextLine.content.startsWith(LIST_ITEM_PREFIX)) {
|
if (nextLine?.depth === itemDepth && nextLine.content.startsWith(LIST_ITEM_PREFIX)) {
|
||||||
throw new RangeError(`Expected ${expectedCount} list array items, but found more`)
|
throw new RangeError(`Expected ${expectedCount} list array items, but found more`)
|
||||||
}
|
}
|
||||||
@@ -34,11 +34,10 @@ export function validateNoExtraListItems(
|
|||||||
* Validates that there are no extra tabular rows beyond the expected count.
|
* Validates that there are no extra tabular rows beyond the expected count.
|
||||||
*/
|
*/
|
||||||
export function validateNoExtraTabularRows(
|
export function validateNoExtraTabularRows(
|
||||||
cursor: LineCursor,
|
nextLine: ParsedLine | undefined,
|
||||||
rowDepth: Depth,
|
rowDepth: Depth,
|
||||||
header: ArrayHeaderInfo,
|
header: ArrayHeaderInfo,
|
||||||
): void {
|
): void {
|
||||||
const nextLine = cursor.peek()
|
|
||||||
if (
|
if (
|
||||||
nextLine?.depth === rowDepth
|
nextLine?.depth === rowDepth
|
||||||
&& !nextLine.content.startsWith(LIST_ITEM_PREFIX)
|
&& !nextLine.content.startsWith(LIST_ITEM_PREFIX)
|
||||||
@@ -62,8 +61,6 @@ export function validateNoBlankLinesInRange(
|
|||||||
return
|
return
|
||||||
|
|
||||||
// Find blank lines within the range
|
// Find blank lines within the range
|
||||||
// Note: We don't filter by depth because ANY blank line between array items is an error,
|
|
||||||
// regardless of its indentation level
|
|
||||||
const firstBlank = blankLines.find(
|
const firstBlank = blankLines.find(
|
||||||
blank => blank.lineNumber > startLine && blank.lineNumber < endLine,
|
blank => blank.lineNumber > startLine && blank.lineNumber < endLine,
|
||||||
)
|
)
|
||||||
@@ -75,6 +72,10 @@ export function validateNoBlankLinesInRange(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// #endregion
|
||||||
|
|
||||||
|
// #region Row classification helpers
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Checks if a line is a data row (vs a key-value pair) in a tabular array.
|
* Checks if a line is a data row (vs a key-value pair) in a tabular array.
|
||||||
*/
|
*/
|
||||||
@@ -95,3 +96,5 @@ function isDataRow(content: string, delimiter: Delimiter): boolean {
|
|||||||
// Colon before delimiter or no delimiter = key-value pair
|
// Colon before delimiter or no delimiter = key-value pair
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// #endregion
|
||||||
|
|||||||
@@ -1,55 +1,27 @@
|
|||||||
import type { DecodeOptions, EncodeOptions, JsonValue, ResolvedDecodeOptions, ResolvedEncodeOptions } from './types'
|
import type { DecodeOptions, DecodeStreamOptions, EncodeOptions, JsonStreamEvent, JsonValue, ResolvedDecodeOptions, ResolvedEncodeOptions } from './types'
|
||||||
import { DEFAULT_DELIMITER } from './constants'
|
import { DEFAULT_DELIMITER } from './constants'
|
||||||
import { decodeValueFromLines } from './decode/decoders'
|
import { decodeStream as decodeStreamCore, decodeStreamSync as decodeStreamSyncCore } from './decode/decoders'
|
||||||
|
import { buildValueFromEvents } from './decode/event-builder'
|
||||||
import { expandPathsSafe } from './decode/expand'
|
import { expandPathsSafe } from './decode/expand'
|
||||||
import { LineCursor, toParsedLines } from './decode/scanner'
|
|
||||||
import { encodeJsonValue } from './encode/encoders'
|
import { encodeJsonValue } from './encode/encoders'
|
||||||
import { normalizeValue } from './encode/normalize'
|
import { normalizeValue } from './encode/normalize'
|
||||||
|
|
||||||
export { DEFAULT_DELIMITER, DELIMITERS } from './constants'
|
export { DEFAULT_DELIMITER, DELIMITERS } from './constants'
|
||||||
export type {
|
export type {
|
||||||
DecodeOptions,
|
DecodeOptions,
|
||||||
|
DecodeStreamOptions,
|
||||||
Delimiter,
|
Delimiter,
|
||||||
DelimiterKey,
|
DelimiterKey,
|
||||||
EncodeOptions,
|
EncodeOptions,
|
||||||
JsonArray,
|
JsonArray,
|
||||||
JsonObject,
|
JsonObject,
|
||||||
JsonPrimitive,
|
JsonPrimitive,
|
||||||
|
JsonStreamEvent,
|
||||||
JsonValue,
|
JsonValue,
|
||||||
ResolvedDecodeOptions,
|
ResolvedDecodeOptions,
|
||||||
ResolvedEncodeOptions,
|
ResolvedEncodeOptions,
|
||||||
} from './types'
|
} from './types'
|
||||||
|
|
||||||
/**
|
|
||||||
* Encodes a JavaScript value into TOON format as a sequence of lines.
|
|
||||||
*
|
|
||||||
* This function yields TOON lines one at a time without building the full string,
|
|
||||||
* making it suitable for streaming large outputs to files, HTTP responses, or process stdout.
|
|
||||||
*
|
|
||||||
* @param input - Any JavaScript value (objects, arrays, primitives)
|
|
||||||
* @param options - Optional encoding configuration
|
|
||||||
* @returns Iterable of TOON lines (without trailing newlines)
|
|
||||||
*
|
|
||||||
* @example
|
|
||||||
* ```ts
|
|
||||||
* // Stream to stdout
|
|
||||||
* for (const line of encodeLines({ name: 'Alice', age: 30 })) {
|
|
||||||
* console.log(line)
|
|
||||||
* }
|
|
||||||
*
|
|
||||||
* // Collect to array
|
|
||||||
* const lines = Array.from(encodeLines(data))
|
|
||||||
*
|
|
||||||
* // Equivalent to encode()
|
|
||||||
* const toonString = Array.from(encodeLines(data, options)).join('\n')
|
|
||||||
* ```
|
|
||||||
*/
|
|
||||||
export function encodeLines(input: unknown, options?: EncodeOptions): Iterable<string> {
|
|
||||||
const normalizedValue = normalizeValue(input)
|
|
||||||
const resolvedOptions = resolveOptions(options)
|
|
||||||
return encodeJsonValue(normalizedValue, resolvedOptions, 0)
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Encodes a JavaScript value into TOON format string.
|
* Encodes a JavaScript value into TOON format string.
|
||||||
*
|
*
|
||||||
@@ -94,15 +66,69 @@ export function encode(input: unknown, options?: EncodeOptions): string {
|
|||||||
* ```
|
* ```
|
||||||
*/
|
*/
|
||||||
export function decode(input: string, options?: DecodeOptions): JsonValue {
|
export function decode(input: string, options?: DecodeOptions): JsonValue {
|
||||||
const resolvedOptions = resolveDecodeOptions(options)
|
const lines = input.split('\n')
|
||||||
const scanResult = toParsedLines(input, resolvedOptions.indent, resolvedOptions.strict)
|
return decodeFromLines(lines, options)
|
||||||
|
|
||||||
if (scanResult.lines.length === 0) {
|
|
||||||
return {}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const cursor = new LineCursor(scanResult.lines, scanResult.blankLines)
|
/**
|
||||||
const decodedValue = decodeValueFromLines(cursor, resolvedOptions)
|
* Encodes a JavaScript value into TOON format as a sequence of lines.
|
||||||
|
*
|
||||||
|
* This function yields TOON lines one at a time without building the full string,
|
||||||
|
* making it suitable for streaming large outputs to files, HTTP responses, or process stdout.
|
||||||
|
*
|
||||||
|
* @param input - Any JavaScript value (objects, arrays, primitives)
|
||||||
|
* @param options - Optional encoding configuration
|
||||||
|
* @returns Iterable of TOON lines (without trailing newlines)
|
||||||
|
*
|
||||||
|
* @example
|
||||||
|
* ```ts
|
||||||
|
* // Stream to stdout
|
||||||
|
* for (const line of encodeLines({ name: 'Alice', age: 30 })) {
|
||||||
|
* console.log(line)
|
||||||
|
* }
|
||||||
|
*
|
||||||
|
* // Collect to array
|
||||||
|
* const lines = Array.from(encodeLines(data))
|
||||||
|
*
|
||||||
|
* // Equivalent to encode()
|
||||||
|
* const toonString = Array.from(encodeLines(data, options)).join('\n')
|
||||||
|
* ```
|
||||||
|
*/
|
||||||
|
export function encodeLines(input: unknown, options?: EncodeOptions): Iterable<string> {
|
||||||
|
const normalizedValue = normalizeValue(input)
|
||||||
|
const resolvedOptions = resolveOptions(options)
|
||||||
|
return encodeJsonValue(normalizedValue, resolvedOptions, 0)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Decodes TOON format from pre-split lines into a JavaScript value.
|
||||||
|
*
|
||||||
|
* This is a convenience wrapper around the streaming decoder that builds
|
||||||
|
* the full value in memory. Useful when you already have lines as an array
|
||||||
|
* or iterable and want the standard decode behavior with path expansion support.
|
||||||
|
*
|
||||||
|
* @param lines - Iterable of TOON lines (without newlines)
|
||||||
|
* @param options - Optional decoding configuration (supports expandPaths)
|
||||||
|
* @returns Parsed JavaScript value (object, array, or primitive)
|
||||||
|
*
|
||||||
|
* @example
|
||||||
|
* ```ts
|
||||||
|
* const lines = ['name: Alice', 'age: 30']
|
||||||
|
* decodeFromLines(lines)
|
||||||
|
* // { name: 'Alice', age: 30 }
|
||||||
|
* ```
|
||||||
|
*/
|
||||||
|
export function decodeFromLines(lines: Iterable<string>, options?: DecodeOptions): JsonValue {
|
||||||
|
const resolvedOptions = resolveDecodeOptions(options)
|
||||||
|
|
||||||
|
// Use streaming decoder without expandPaths
|
||||||
|
const streamOptions: DecodeStreamOptions = {
|
||||||
|
indent: resolvedOptions.indent,
|
||||||
|
strict: resolvedOptions.strict,
|
||||||
|
}
|
||||||
|
|
||||||
|
const events = decodeStreamSyncCore(lines, streamOptions)
|
||||||
|
const decodedValue = buildValueFromEvents(events)
|
||||||
|
|
||||||
// Apply path expansion if enabled
|
// Apply path expansion if enabled
|
||||||
if (resolvedOptions.expandPaths === 'safe') {
|
if (resolvedOptions.expandPaths === 'safe') {
|
||||||
@@ -112,6 +138,72 @@ export function decode(input: string, options?: DecodeOptions): JsonValue {
|
|||||||
return decodedValue
|
return decodedValue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Synchronously decodes TOON lines into a stream of JSON events.
|
||||||
|
*
|
||||||
|
* This function yields structured events (startObject, endObject, startArray, endArray,
|
||||||
|
* key, primitive) that represent the JSON data model without building the full value tree.
|
||||||
|
* Useful for streaming processing, custom transformations, or memory-efficient parsing.
|
||||||
|
*
|
||||||
|
* @remarks
|
||||||
|
* Path expansion (`expandPaths: 'safe'`) is not supported in streaming mode.
|
||||||
|
*
|
||||||
|
* @param lines - Iterable of TOON lines (without newlines)
|
||||||
|
* @param options - Optional decoding configuration (expandPaths not supported)
|
||||||
|
* @returns Iterable of JSON stream events
|
||||||
|
*
|
||||||
|
* @example
|
||||||
|
* ```ts
|
||||||
|
* const lines = ['name: Alice', 'age: 30']
|
||||||
|
* for (const event of decodeStreamSync(lines)) {
|
||||||
|
* console.log(event)
|
||||||
|
* // { type: 'startObject' }
|
||||||
|
* // { type: 'key', key: 'name' }
|
||||||
|
* // { type: 'primitive', value: 'Alice' }
|
||||||
|
* // ...
|
||||||
|
* }
|
||||||
|
* ```
|
||||||
|
*/
|
||||||
|
export function decodeStreamSync(lines: Iterable<string>, options?: DecodeStreamOptions): Iterable<JsonStreamEvent> {
|
||||||
|
return decodeStreamSyncCore(lines, options)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Asynchronously decodes TOON lines into a stream of JSON events.
|
||||||
|
*
|
||||||
|
* This function yields structured events (startObject, endObject, startArray, endArray,
|
||||||
|
* key, primitive) that represent the JSON data model without building the full value tree.
|
||||||
|
* Supports both sync and async iterables for maximum flexibility with file streams,
|
||||||
|
* network responses, or other async sources.
|
||||||
|
*
|
||||||
|
* @remarks
|
||||||
|
* Path expansion (`expandPaths: 'safe'`) is not supported in streaming mode.
|
||||||
|
*
|
||||||
|
* @param source - Async or sync iterable of TOON lines (without newlines)
|
||||||
|
* @param options - Optional decoding configuration (expandPaths not supported)
|
||||||
|
* @returns Async iterable of JSON stream events
|
||||||
|
*
|
||||||
|
* @example
|
||||||
|
* ```ts
|
||||||
|
* const fileStream = createReadStream('data.toon', 'utf-8')
|
||||||
|
* const lines = splitLines(fileStream) // Async iterable of lines
|
||||||
|
*
|
||||||
|
* for await (const event of decodeStream(lines)) {
|
||||||
|
* console.log(event)
|
||||||
|
* // { type: 'startObject' }
|
||||||
|
* // { type: 'key', key: 'name' }
|
||||||
|
* // { type: 'primitive', value: 'Alice' }
|
||||||
|
* // ...
|
||||||
|
* }
|
||||||
|
* ```
|
||||||
|
*/
|
||||||
|
export function decodeStream(
|
||||||
|
source: AsyncIterable<string> | Iterable<string>,
|
||||||
|
options?: DecodeStreamOptions,
|
||||||
|
): AsyncIterable<JsonStreamEvent> {
|
||||||
|
return decodeStreamCore(source, options)
|
||||||
|
}
|
||||||
|
|
||||||
function resolveOptions(options?: EncodeOptions): ResolvedEncodeOptions {
|
function resolveOptions(options?: EncodeOptions): ResolvedEncodeOptions {
|
||||||
return {
|
return {
|
||||||
indent: options?.indent ?? 2,
|
indent: options?.indent ?? 2,
|
||||||
|
|||||||
@@ -69,6 +69,32 @@ export interface DecodeOptions {
|
|||||||
|
|
||||||
export type ResolvedDecodeOptions = Readonly<Required<DecodeOptions>>
|
export type ResolvedDecodeOptions = Readonly<Required<DecodeOptions>>
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Options for streaming decode operations.
|
||||||
|
*
|
||||||
|
* @remarks
|
||||||
|
* Path expansion is not supported in streaming mode.
|
||||||
|
*/
|
||||||
|
export interface DecodeStreamOptions extends Omit<DecodeOptions, 'expandPaths'> {
|
||||||
|
/**
|
||||||
|
* Path expansion is not supported in streaming decode.
|
||||||
|
* This option is explicitly omitted.
|
||||||
|
*/
|
||||||
|
expandPaths?: never
|
||||||
|
}
|
||||||
|
|
||||||
|
// #endregion
|
||||||
|
|
||||||
|
// #region Streaming decoder types
|
||||||
|
|
||||||
|
export type JsonStreamEvent
|
||||||
|
= | { type: 'startObject' }
|
||||||
|
| { type: 'endObject' }
|
||||||
|
| { type: 'startArray', length: number }
|
||||||
|
| { type: 'endArray' }
|
||||||
|
| { type: 'key', key: string, wasQuoted?: boolean }
|
||||||
|
| { type: 'primitive', value: JsonPrimitive }
|
||||||
|
|
||||||
// #endregion
|
// #endregion
|
||||||
|
|
||||||
// #region Decoder parsing types
|
// #region Decoder parsing types
|
||||||
|
|||||||
343
packages/toon/test/decodeStream.test.ts
Normal file
343
packages/toon/test/decodeStream.test.ts
Normal file
@@ -0,0 +1,343 @@
|
|||||||
|
import { describe, expect, it } from 'vitest'
|
||||||
|
import { buildValueFromEvents } from '../src/decode/event-builder'
|
||||||
|
import { decode, decodeFromLines, decodeStreamSync } from '../src/index'
|
||||||
|
|
||||||
|
describe('streaming decode', () => {
|
||||||
|
describe('decodeStreamSync', () => {
|
||||||
|
it('should decode simple object', () => {
|
||||||
|
const input = 'name: Alice\nage: 30'
|
||||||
|
const lines = input.split('\n')
|
||||||
|
const events = Array.from(decodeStreamSync(lines))
|
||||||
|
|
||||||
|
expect(events).toEqual([
|
||||||
|
{ type: 'startObject' },
|
||||||
|
{ type: 'key', key: 'name' },
|
||||||
|
{ type: 'primitive', value: 'Alice' },
|
||||||
|
{ type: 'key', key: 'age' },
|
||||||
|
{ type: 'primitive', value: 30 },
|
||||||
|
{ type: 'endObject' },
|
||||||
|
])
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should decode nested object', () => {
|
||||||
|
const input = 'user:\n name: Alice\n age: 30'
|
||||||
|
const lines = input.split('\n')
|
||||||
|
const events = Array.from(decodeStreamSync(lines))
|
||||||
|
|
||||||
|
expect(events).toEqual([
|
||||||
|
{ type: 'startObject' },
|
||||||
|
{ type: 'key', key: 'user' },
|
||||||
|
{ type: 'startObject' },
|
||||||
|
{ type: 'key', key: 'name' },
|
||||||
|
{ type: 'primitive', value: 'Alice' },
|
||||||
|
{ type: 'key', key: 'age' },
|
||||||
|
{ type: 'primitive', value: 30 },
|
||||||
|
{ type: 'endObject' },
|
||||||
|
{ type: 'endObject' },
|
||||||
|
])
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should decode inline primitive array', () => {
|
||||||
|
const input = 'scores[3]: 95, 87, 92'
|
||||||
|
const lines = input.split('\n')
|
||||||
|
const events = Array.from(decodeStreamSync(lines))
|
||||||
|
|
||||||
|
expect(events).toEqual([
|
||||||
|
{ type: 'startObject' },
|
||||||
|
{ type: 'key', key: 'scores' },
|
||||||
|
{ type: 'startArray', length: 3 },
|
||||||
|
{ type: 'primitive', value: 95 },
|
||||||
|
{ type: 'primitive', value: 87 },
|
||||||
|
{ type: 'primitive', value: 92 },
|
||||||
|
{ type: 'endArray' },
|
||||||
|
{ type: 'endObject' },
|
||||||
|
])
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should decode list array', () => {
|
||||||
|
const input = 'items[2]:\n - Apple\n - Banana'
|
||||||
|
const lines = input.split('\n')
|
||||||
|
const events = Array.from(decodeStreamSync(lines))
|
||||||
|
|
||||||
|
expect(events).toEqual([
|
||||||
|
{ type: 'startObject' },
|
||||||
|
{ type: 'key', key: 'items' },
|
||||||
|
{ type: 'startArray', length: 2 },
|
||||||
|
{ type: 'primitive', value: 'Apple' },
|
||||||
|
{ type: 'primitive', value: 'Banana' },
|
||||||
|
{ type: 'endArray' },
|
||||||
|
{ type: 'endObject' },
|
||||||
|
])
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should decode tabular array', () => {
|
||||||
|
const input = 'users[2]{name,age}:\n Alice, 30\n Bob, 25'
|
||||||
|
const lines = input.split('\n')
|
||||||
|
const events = Array.from(decodeStreamSync(lines))
|
||||||
|
|
||||||
|
expect(events).toEqual([
|
||||||
|
{ type: 'startObject' },
|
||||||
|
{ type: 'key', key: 'users' },
|
||||||
|
{ type: 'startArray', length: 2 },
|
||||||
|
{ type: 'startObject' },
|
||||||
|
{ type: 'key', key: 'name' },
|
||||||
|
{ type: 'primitive', value: 'Alice' },
|
||||||
|
{ type: 'key', key: 'age' },
|
||||||
|
{ type: 'primitive', value: 30 },
|
||||||
|
{ type: 'endObject' },
|
||||||
|
{ type: 'startObject' },
|
||||||
|
{ type: 'key', key: 'name' },
|
||||||
|
{ type: 'primitive', value: 'Bob' },
|
||||||
|
{ type: 'key', key: 'age' },
|
||||||
|
{ type: 'primitive', value: 25 },
|
||||||
|
{ type: 'endObject' },
|
||||||
|
{ type: 'endArray' },
|
||||||
|
{ type: 'endObject' },
|
||||||
|
])
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should decode root primitive', () => {
|
||||||
|
const input = 'Hello World'
|
||||||
|
const lines = input.split('\n')
|
||||||
|
const events = Array.from(decodeStreamSync(lines))
|
||||||
|
|
||||||
|
expect(events).toEqual([
|
||||||
|
{ type: 'primitive', value: 'Hello World' },
|
||||||
|
])
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should decode root array', () => {
|
||||||
|
const input = '[2]:\n - Apple\n - Banana'
|
||||||
|
const lines = input.split('\n')
|
||||||
|
const events = Array.from(decodeStreamSync(lines))
|
||||||
|
|
||||||
|
expect(events).toEqual([
|
||||||
|
{ type: 'startArray', length: 2 },
|
||||||
|
{ type: 'primitive', value: 'Apple' },
|
||||||
|
{ type: 'primitive', value: 'Banana' },
|
||||||
|
{ type: 'endArray' },
|
||||||
|
])
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should decode empty input as empty object', () => {
|
||||||
|
const lines: string[] = []
|
||||||
|
const events = Array.from(decodeStreamSync(lines))
|
||||||
|
|
||||||
|
expect(events).toEqual([
|
||||||
|
{ type: 'startObject' },
|
||||||
|
{ type: 'endObject' },
|
||||||
|
])
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should throw on expandPaths option', () => {
|
||||||
|
const input = 'name: Alice'
|
||||||
|
const lines = input.split('\n')
|
||||||
|
|
||||||
|
expect(() => Array.from(decodeStreamSync(lines, { expandPaths: 'safe' } as any)))
|
||||||
|
.toThrow('expandPaths is not supported in streaming decode')
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should enforce strict mode validation', () => {
|
||||||
|
const input = 'items[2]:\n - Apple'
|
||||||
|
const lines = input.split('\n')
|
||||||
|
|
||||||
|
expect(() => Array.from(decodeStreamSync(lines, { strict: true })))
|
||||||
|
.toThrow()
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should allow count mismatch in non-strict mode', () => {
|
||||||
|
const input = 'items[2]:\n - Apple'
|
||||||
|
const lines = input.split('\n')
|
||||||
|
|
||||||
|
// Should not throw in non-strict mode
|
||||||
|
const events = Array.from(decodeStreamSync(lines, { strict: false }))
|
||||||
|
|
||||||
|
expect(events).toBeDefined()
|
||||||
|
expect(events[0]).toEqual({ type: 'startObject' })
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('buildValueFromEvents', () => {
|
||||||
|
it('should build object from events', () => {
|
||||||
|
const events = [
|
||||||
|
{ type: 'startObject' as const },
|
||||||
|
{ type: 'key' as const, key: 'name' },
|
||||||
|
{ type: 'primitive' as const, value: 'Alice' },
|
||||||
|
{ type: 'key' as const, key: 'age' },
|
||||||
|
{ type: 'primitive' as const, value: 30 },
|
||||||
|
{ type: 'endObject' as const },
|
||||||
|
]
|
||||||
|
|
||||||
|
const result = buildValueFromEvents(events)
|
||||||
|
|
||||||
|
expect(result).toEqual({ name: 'Alice', age: 30 })
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should build nested object from events', () => {
|
||||||
|
const events = [
|
||||||
|
{ type: 'startObject' as const },
|
||||||
|
{ type: 'key' as const, key: 'user' },
|
||||||
|
{ type: 'startObject' as const },
|
||||||
|
{ type: 'key' as const, key: 'name' },
|
||||||
|
{ type: 'primitive' as const, value: 'Alice' },
|
||||||
|
{ type: 'endObject' as const },
|
||||||
|
{ type: 'endObject' as const },
|
||||||
|
]
|
||||||
|
|
||||||
|
const result = buildValueFromEvents(events)
|
||||||
|
|
||||||
|
expect(result).toEqual({ user: { name: 'Alice' } })
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should build array from events', () => {
|
||||||
|
const events = [
|
||||||
|
{ type: 'startArray' as const, length: 3 },
|
||||||
|
{ type: 'primitive' as const, value: 1 },
|
||||||
|
{ type: 'primitive' as const, value: 2 },
|
||||||
|
{ type: 'primitive' as const, value: 3 },
|
||||||
|
{ type: 'endArray' as const },
|
||||||
|
]
|
||||||
|
|
||||||
|
const result = buildValueFromEvents(events)
|
||||||
|
|
||||||
|
expect(result).toEqual([1, 2, 3])
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should build primitive from events', () => {
|
||||||
|
const events = [
|
||||||
|
{ type: 'primitive' as const, value: 'Hello' },
|
||||||
|
]
|
||||||
|
|
||||||
|
const result = buildValueFromEvents(events)
|
||||||
|
|
||||||
|
expect(result).toEqual('Hello')
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should throw on incomplete event stream', () => {
|
||||||
|
const events = [
|
||||||
|
{ type: 'startObject' as const },
|
||||||
|
{ type: 'key' as const, key: 'name' },
|
||||||
|
// Missing primitive and endObject
|
||||||
|
]
|
||||||
|
|
||||||
|
expect(() => buildValueFromEvents(events))
|
||||||
|
.toThrow('Incomplete event stream')
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('decodeFromLines', () => {
|
||||||
|
it('should produce same result as decode', () => {
|
||||||
|
const input = 'name: Alice\nage: 30\nscores[3]: 95, 87, 92'
|
||||||
|
const lines = input.split('\n')
|
||||||
|
|
||||||
|
const fromLines = decodeFromLines(lines)
|
||||||
|
const fromString = decode(input)
|
||||||
|
|
||||||
|
expect(fromLines).toEqual(fromString)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should support expandPaths option', () => {
|
||||||
|
const input = 'user.name: Alice\nuser.age: 30'
|
||||||
|
const lines = input.split('\n')
|
||||||
|
|
||||||
|
const result = decodeFromLines(lines, { expandPaths: 'safe' })
|
||||||
|
|
||||||
|
expect(result).toEqual({
|
||||||
|
user: {
|
||||||
|
name: 'Alice',
|
||||||
|
age: 30,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should handle complex nested structures', () => {
|
||||||
|
const input = [
|
||||||
|
'users[2]:',
|
||||||
|
' - name: Alice',
|
||||||
|
' scores[3]: 95, 87, 92',
|
||||||
|
' - name: Bob',
|
||||||
|
' scores[3]: 88, 91, 85',
|
||||||
|
].join('\n')
|
||||||
|
|
||||||
|
const fromLines = decodeFromLines(input.split('\n'))
|
||||||
|
const fromString = decode(input)
|
||||||
|
|
||||||
|
expect(fromLines).toEqual(fromString)
|
||||||
|
expect(fromLines).toEqual({
|
||||||
|
users: [
|
||||||
|
{ name: 'Alice', scores: [95, 87, 92] },
|
||||||
|
{ name: 'Bob', scores: [88, 91, 85] },
|
||||||
|
],
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should handle tabular arrays', () => {
|
||||||
|
const input = [
|
||||||
|
'users[3]{name,age,city}:',
|
||||||
|
' Alice, 30, NYC',
|
||||||
|
' Bob, 25, LA',
|
||||||
|
' Charlie, 35, SF',
|
||||||
|
].join('\n')
|
||||||
|
|
||||||
|
const fromLines = decodeFromLines(input.split('\n'))
|
||||||
|
const fromString = decode(input)
|
||||||
|
|
||||||
|
expect(fromLines).toEqual(fromString)
|
||||||
|
expect(fromLines).toEqual({
|
||||||
|
users: [
|
||||||
|
{ name: 'Alice', age: 30, city: 'NYC' },
|
||||||
|
{ name: 'Bob', age: 25, city: 'LA' },
|
||||||
|
{ name: 'Charlie', age: 35, city: 'SF' },
|
||||||
|
],
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('streaming equivalence', () => {
|
||||||
|
// Test that streaming produces same results as non-streaming for various inputs
|
||||||
|
const testCases = [
|
||||||
|
{
|
||||||
|
name: 'simple object',
|
||||||
|
input: 'name: Alice\nage: 30',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'nested objects',
|
||||||
|
input: 'user:\n profile:\n name: Alice\n age: 30',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'mixed structures',
|
||||||
|
input: 'name: Alice\nscores[3]: 95, 87, 92\naddress:\n city: NYC\n zip: 10001',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'list array with objects',
|
||||||
|
input: 'users[2]:\n - name: Alice\n age: 30\n - name: Bob\n age: 25',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'root primitive number',
|
||||||
|
input: '42',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'root primitive string',
|
||||||
|
input: 'Hello World',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'root primitive boolean',
|
||||||
|
input: 'true',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'root primitive null',
|
||||||
|
input: 'null',
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
for (const testCase of testCases) {
|
||||||
|
it(`should match decode() for: ${testCase.name}`, () => {
|
||||||
|
const lines = testCase.input.split('\n')
|
||||||
|
const streamResult = decodeFromLines(lines)
|
||||||
|
const regularResult = decode(testCase.input)
|
||||||
|
|
||||||
|
expect(streamResult).toEqual(regularResult)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
})
|
||||||
|
})
|
||||||
Reference in New Issue
Block a user