feat: opt-in key folding and path expansion (closes #86)

This commit is contained in:
Johann Schopplich
2025-11-10 09:56:09 +01:00
parent e1f5d1313d
commit eefb0242e2
14 changed files with 647 additions and 12 deletions

View File

@@ -4,7 +4,7 @@
[![CI](https://github.com/toon-format/toon/actions/workflows/ci.yml/badge.svg)](https://github.com/toon-format/toon/actions) [![CI](https://github.com/toon-format/toon/actions/workflows/ci.yml/badge.svg)](https://github.com/toon-format/toon/actions)
[![npm version](https://img.shields.io/npm/v/@toon-format/toon.svg)](https://www.npmjs.com/package/@toon-format/toon) [![npm version](https://img.shields.io/npm/v/@toon-format/toon.svg)](https://www.npmjs.com/package/@toon-format/toon)
[![SPEC v1.4](https://img.shields.io/badge/spec-v1.4-lightgray)](https://github.com/toon-format/spec) [![SPEC v1.5](https://img.shields.io/badge/spec-v1.5-lightgray)](https://github.com/toon-format/spec)
[![npm downloads (total)](https://img.shields.io/npm/dt/@toon-format/toon.svg)](https://www.npmjs.com/package/@toon-format/toon) [![npm downloads (total)](https://img.shields.io/npm/dt/@toon-format/toon.svg)](https://www.npmjs.com/package/@toon-format/toon)
[![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](./LICENSE) [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](./LICENSE)
@@ -80,6 +80,7 @@ See [benchmarks](#benchmarks) for concrete comparisons across different data str
- 🍱 **Minimal syntax:** removes redundant punctuation (braces, brackets, most quotes) - 🍱 **Minimal syntax:** removes redundant punctuation (braces, brackets, most quotes)
- 📐 **Indentation-based structure:** like YAML, uses whitespace instead of braces - 📐 **Indentation-based structure:** like YAML, uses whitespace instead of braces
- 🧺 **Tabular arrays:** declare keys once, stream data as rows - 🧺 **Tabular arrays:** declare keys once, stream data as rows
- 🔗 **Optional key folding (v1.5):** collapses single-key wrapper chains into dotted paths (e.g., `data.metadata.items`) to reduce indentation and tokens
[^1]: For flat tabular data, CSV is more compact. TOON adds minimal overhead to provide explicit structure and validation that improves LLM reliability. [^1]: For flat tabular data, CSV is more compact. TOON adds minimal overhead to provide explicit structure and validation that improves LLM reliability.
@@ -736,6 +737,9 @@ cat data.toon | npx @toon-format/cli --decode
| `--length-marker` | Add `#` prefix to array lengths (e.g., `items[#3]`) | | `--length-marker` | Add `#` prefix to array lengths (e.g., `items[#3]`) |
| `--stats` | Show token count estimates and savings (encode only) | | `--stats` | Show token count estimates and savings (encode only) |
| `--no-strict` | Disable strict validation when decoding | | `--no-strict` | Disable strict validation when decoding |
| `--key-folding <mode>` | Key folding mode: `off`, `safe` (default: `off`) - collapses nested chains (v1.5) |
| `--flatten-depth <number>` | Maximum segments to fold (default: `Infinity`) - requires `--key-folding safe` (v1.5) |
| `--expand-paths <mode>` | Path expansion mode: `off`, `safe` (default: `off`) - reconstructs dotted keys (v1.5) |
### Examples ### Examples
@@ -752,6 +756,9 @@ npx @toon-format/cli data.json --delimiter "|" --length-marker -o output.toon
# Lenient decoding (skip validation) # Lenient decoding (skip validation)
npx @toon-format/cli data.toon --no-strict -o output.json npx @toon-format/cli data.toon --no-strict -o output.json
# Key folding for nested data (v1.5)
npx @toon-format/cli data.json --key-folding safe -o output.toon
# Stdin workflows # Stdin workflows
echo '{"name": "Ada", "age": 30}' | npx @toon-format/cli --stats echo '{"name": "Ada", "age": 30}' | npx @toon-format/cli --stats
cat large-dataset.json | npx @toon-format/cli --delimiter "\t" > output.toon cat large-dataset.json | npx @toon-format/cli --delimiter "\t" > output.toon
@@ -797,6 +804,40 @@ user:
name: Ada name: Ada
``` ```
### Key Folding (Optional)
New in v1.5: Optionally collapse single-key wrapper chains into dotted paths to reduce tokens. Enable with `keyFolding: 'safe'`.
Standard nesting:
```
data:
metadata:
items[2]: a,b
```
With key folding:
```
data.metadata.items[2]: a,b
```
Round-trip with path expansion:
```ts
import { decode, encode } from '@toon-format/toon'
const original = { data: { metadata: { items: ['a', 'b'] } } }
const toon = encode(original, { keyFolding: 'safe' })
// → "data.metadata.items[2]: a,b"
const restored = decode(toon, { expandPaths: 'safe' })
// → Matches original structure
```
See §13.4 in the [specification](https://github.com/toon-format/spec/blob/main/SPEC.md#134-key-folding-and-path-expansion) for folding rules and safety guarantees.
### Arrays ### Arrays
> [!TIP] > [!TIP]
@@ -975,6 +1016,8 @@ Converts any JSON-serializable value to TOON format.
- `indent?: number` Number of spaces per indentation level (default: `2`) - `indent?: number` Number of spaces per indentation level (default: `2`)
- `delimiter?: ',' | '\t' | '|'` Delimiter for array values and tabular rows (default: `','`) - `delimiter?: ',' | '\t' | '|'` Delimiter for array values and tabular rows (default: `','`)
- `lengthMarker?: '#' | false` Optional marker to prefix array lengths (default: `false`) - `lengthMarker?: '#' | false` Optional marker to prefix array lengths (default: `false`)
- `keyFolding?: 'off' | 'safe'` Enable key folding to collapse single-key wrapper chains into dotted paths (default: `'off'`). When `'safe'`, only valid identifier segments are folded (v1.5)
- `flattenDepth?: number` Maximum number of segments to fold when `keyFolding` is enabled (default: `Infinity`). Values 0-1 have no practical effect (v1.5)
**Returns:** **Returns:**
@@ -1096,6 +1139,7 @@ Converts a TOON-formatted string back to JavaScript values.
- `options` Optional decoding options: - `options` Optional decoding options:
- `indent?: number` Expected number of spaces per indentation level (default: `2`) - `indent?: number` Expected number of spaces per indentation level (default: `2`)
- `strict?: boolean` Enable strict validation (default: `true`) - `strict?: boolean` Enable strict validation (default: `true`)
- `expandPaths?: 'off' | 'safe'` Enable path expansion to reconstruct dotted keys into nested objects (default: `'off'`). Pairs with `keyFolding: 'safe'` for lossless round-trips (v1.5)
**Returns:** **Returns:**
@@ -1223,7 +1267,7 @@ Task: Return only users with role "user" as TOON. Use the same header. Set [N] t
## Other Implementations ## Other Implementations
> [!NOTE] > [!NOTE]
> When implementing TOON in other languages, please follow the [specification](https://github.com/toon-format/spec/blob/main/SPEC.md) (currently v1.4) to ensure compatibility across implementations. The [conformance tests](https://github.com/toon-format/spec/tree/main/tests) provide language-agnostic test fixtures that validate implementations across any language. > When implementing TOON in other languages, please follow the [specification](https://github.com/toon-format/spec/blob/main/SPEC.md) (currently v1.5) to ensure compatibility across implementations. The [conformance tests](https://github.com/toon-format/spec/tree/main/tests) provide language-agnostic test fixtures that validate your implementations.
### Official Implementations ### Official Implementations

View File

@@ -65,6 +65,9 @@ cat data.toon | toon --decode
| `--length-marker` | Add `#` prefix to array lengths (e.g., `items[#3]`) | | `--length-marker` | Add `#` prefix to array lengths (e.g., `items[#3]`) |
| `--stats` | Show token count estimates and savings (encode only) | | `--stats` | Show token count estimates and savings (encode only) |
| `--no-strict` | Disable strict validation when decoding | | `--no-strict` | Disable strict validation when decoding |
| `--key-folding <mode>` | Enable key folding: `off`, `safe` (default: `off`) - v1.5 |
| `--flatten-depth <number>` | Maximum folded segment count when key folding is enabled (default: `Infinity`) - v1.5 |
| `--expand-paths <mode>` | Enable path expansion: `off`, `safe` (default: `off`) - v1.5 |
## Advanced Examples ## Advanced Examples
@@ -119,12 +122,81 @@ cat large-dataset.json | toon --delimiter "\t" > output.toon
jq '.results' data.json | toon > filtered.toon jq '.results' data.json | toon > filtered.toon
``` ```
### Key Folding (v1.5)
Collapse nested wrapper chains to reduce tokens:
#### Basic key folding
```bash
# Encode with key folding
toon input.json --key-folding safe -o output.toon
```
For data like:
```json
{
"data": {
"metadata": {
"items": ["a", "b"]
}
}
}
```
Output becomes:
```
data.metadata.items[2]: a,b
```
Instead of:
```
data:
metadata:
items[2]: a,b
```
#### Limit folding depth
```bash
# Fold maximum 2 levels deep
toon input.json --key-folding safe --flatten-depth 2 -o output.toon
```
#### Path expansion on decode
```bash
# Reconstruct nested structure from folded keys
toon data.toon --expand-paths safe -o output.json
```
#### Round-trip workflow
```bash
# Encode with folding
toon input.json --key-folding safe -o compressed.toon
# Decode with expansion (restores original structure)
toon compressed.toon --expand-paths safe -o output.json
# Verify round-trip
diff input.json output.json
```
#### Combined with other options
```bash
# Key folding + tab delimiter + stats
toon data.json --key-folding safe --delimiter "\t" --stats -o output.toon
```
## Why Use the CLI? ## Why Use the CLI?
- **Quick conversions** between formats without writing code - **Quick conversions** between formats without writing code
- **Token analysis** to see potential savings before sending to LLMs - **Token analysis** to see potential savings before sending to LLMs
- **Pipeline integration** with existing JSON-based workflows - **Pipeline integration** with existing JSON-based workflows
- **Flexible formatting** with delimiter and indentation options - **Flexible formatting** with delimiter and indentation options
- **Key folding (v1.5)** to collapse nested wrappers for additional token savings
## Related ## Related

View File

@@ -1,4 +1,4 @@
import type { DecodeOptions, Delimiter, EncodeOptions } from '../../toon/src' import type { DecodeOptions, EncodeOptions } from '../../toon/src'
import type { InputSource } from './types' import type { InputSource } from './types'
import * as fsp from 'node:fs/promises' import * as fsp from 'node:fs/promises'
import * as path from 'node:path' import * as path from 'node:path'
@@ -11,9 +11,11 @@ import { formatInputLabel, readInput } from './utils'
export async function encodeToToon(config: { export async function encodeToToon(config: {
input: InputSource input: InputSource
output?: string output?: string
delimiter: Delimiter indent: NonNullable<EncodeOptions['indent']>
indent: number delimiter: NonNullable<EncodeOptions['delimiter']>
lengthMarker: NonNullable<EncodeOptions['lengthMarker']> lengthMarker: NonNullable<EncodeOptions['lengthMarker']>
keyFolding?: NonNullable<EncodeOptions['keyFolding']>
flattenDepth?: number
printStats: boolean printStats: boolean
}): Promise<void> { }): Promise<void> {
const jsonContent = await readInput(config.input) const jsonContent = await readInput(config.input)
@@ -30,6 +32,8 @@ export async function encodeToToon(config: {
delimiter: config.delimiter, delimiter: config.delimiter,
indent: config.indent, indent: config.indent,
lengthMarker: config.lengthMarker, lengthMarker: config.lengthMarker,
keyFolding: config.keyFolding,
flattenDepth: config.flattenDepth,
} }
const toonOutput = encode(data, encodeOptions) const toonOutput = encode(data, encodeOptions)
@@ -59,8 +63,9 @@ export async function encodeToToon(config: {
export async function decodeToJson(config: { export async function decodeToJson(config: {
input: InputSource input: InputSource
output?: string output?: string
indent: number indent: NonNullable<DecodeOptions['indent']>
strict: boolean strict: NonNullable<DecodeOptions['strict']>
expandPaths?: NonNullable<DecodeOptions['expandPaths']>
}): Promise<void> { }): Promise<void> {
const toonContent = await readInput(config.input) const toonContent = await readInput(config.input)
@@ -69,6 +74,7 @@ export async function decodeToJson(config: {
const decodeOptions: DecodeOptions = { const decodeOptions: DecodeOptions = {
indent: config.indent, indent: config.indent,
strict: config.strict, strict: config.strict,
expandPaths: config.expandPaths,
} }
data = decode(toonContent, decodeOptions) data = decode(toonContent, decodeOptions)
} }

View File

@@ -1,5 +1,5 @@
import type { CommandDef } from 'citty' import type { CommandDef } from 'citty'
import type { Delimiter } from '../../toon/src' import type { DecodeOptions, Delimiter, EncodeOptions } from '../../toon/src'
import type { InputSource } from './types' import type { InputSource } from './types'
import * as path from 'node:path' import * as path from 'node:path'
import process from 'node:process' import process from 'node:process'
@@ -51,6 +51,20 @@ export const mainCommand: CommandDef<{
description: string description: string
default: true default: true
} }
keyFolding: {
type: 'string'
description: string
default: string
}
flattenDepth: {
type: 'string'
description: string
}
expandPaths: {
type: 'string'
description: string
default: string
}
stats: { stats: {
type: 'boolean' type: 'boolean'
description: string description: string
@@ -103,6 +117,20 @@ export const mainCommand: CommandDef<{
description: 'Enable strict mode for decoding', description: 'Enable strict mode for decoding',
default: true, default: true,
}, },
keyFolding: {
type: 'string',
description: 'Enable key folding: off, safe (default: off)',
default: 'off',
},
flattenDepth: {
type: 'string',
description: 'Maximum folded segment count when key folding is enabled (default: Infinity)',
},
expandPaths: {
type: 'string',
description: 'Enable path expansion: off, safe (default: off)',
default: 'off',
},
stats: { stats: {
type: 'boolean', type: 'boolean',
description: 'Show token statistics', description: 'Show token statistics',
@@ -129,6 +157,27 @@ export const mainCommand: CommandDef<{
throw new Error(`Invalid delimiter "${delimiter}". Valid delimiters are: comma (,), tab (\\t), pipe (|)`) throw new Error(`Invalid delimiter "${delimiter}". Valid delimiters are: comma (,), tab (\\t), pipe (|)`)
} }
// Validate `keyFolding`
const keyFolding = args.keyFolding || 'off'
if (keyFolding !== 'off' && keyFolding !== 'safe') {
throw new Error(`Invalid keyFolding value "${keyFolding}". Valid values are: off, safe`)
}
// Parse and validate `flattenDepth`
let flattenDepth: number | undefined
if (args.flattenDepth !== undefined) {
flattenDepth = Number.parseInt(args.flattenDepth, 10)
if (Number.isNaN(flattenDepth) || flattenDepth < 0) {
throw new Error(`Invalid flattenDepth value: ${args.flattenDepth}`)
}
}
// Validate `expandPaths`
const expandPaths = args.expandPaths || 'off'
if (expandPaths !== 'off' && expandPaths !== 'safe') {
throw new Error(`Invalid expandPaths value "${expandPaths}". Valid values are: off, safe`)
}
const mode = detectMode(inputSource, args.encode, args.decode) const mode = detectMode(inputSource, args.encode, args.decode)
try { try {
@@ -140,6 +189,8 @@ export const mainCommand: CommandDef<{
indent, indent,
lengthMarker: args.lengthMarker === true ? '#' : false, lengthMarker: args.lengthMarker === true ? '#' : false,
printStats: args.stats === true, printStats: args.stats === true,
keyFolding: keyFolding as NonNullable<EncodeOptions['keyFolding']>,
flattenDepth,
}) })
} }
else { else {
@@ -148,6 +199,7 @@ export const mainCommand: CommandDef<{
output: outputPath, output: outputPath,
indent, indent,
strict: args.strict !== false, strict: args.strict !== false,
expandPaths: expandPaths as NonNullable<DecodeOptions['expandPaths']>,
}) })
} }
} }

View File

@@ -12,6 +12,7 @@ export const COLON = ':'
export const SPACE = ' ' export const SPACE = ' '
export const PIPE = '|' export const PIPE = '|'
export const HASH = '#' export const HASH = '#'
export const DOT = '.'
// #endregion // #endregion

View File

@@ -0,0 +1,196 @@
import type { JsonObject, JsonValue } from '../types'
import { DOT } from '../constants'
import { isJsonObject } from '../encode/normalize'
import { isIdentifierSegment } from '../shared/validation'
// #region Path expansion (safe)
/**
* Checks if two values can be merged (both are plain objects).
*/
function canMerge(a: JsonValue, b: JsonValue): a is JsonObject {
return isJsonObject(a) && isJsonObject(b)
}
/**
* Expands dotted keys into nested objects in safe mode.
*
* @remarks
* This function recursively traverses a decoded TOON value and expands any keys
* containing dots (`.`) into nested object structures, provided all segments
* are valid identifiers.
*
* Expansion rules:
* - Keys containing dots are split into segments
* - All segments must pass `isIdentifierSegment` validation
* - Non-eligible keys (with special characters) are left as literal dotted keys
* - Deep merge: When multiple dotted keys expand to the same path, their values are merged if both are objects
* - Conflict handling:
* - `strict=true`: Throws TypeError on conflicts (non-object collision)
* - `strict=false`: LWW (silent overwrite)
*
* @param value - The decoded value to expand
* @param strict - Whether to throw errors on conflicts
* @returns The expanded value with dotted keys reconstructed as nested objects
* @throws TypeError if conflicts occur in strict mode
*/
export function expandPathsSafe(value: JsonValue, strict: boolean): JsonValue {
if (Array.isArray(value)) {
// Recursively expand array elements
return value.map(item => expandPathsSafe(item, strict))
}
if (isJsonObject(value)) {
const result: JsonObject = {}
const keys = Object.keys(value)
for (const key of keys) {
const val = value[key]!
// Check if key contains dots
if (key.includes(DOT)) {
const segments = key.split(DOT)
// Validate all segments are identifiers
if (segments.every(seg => isIdentifierSegment(seg))) {
// Expand this dotted key
const expandedValue = expandPathsSafe(val, strict)
insertPathSafe(result, segments, expandedValue, strict)
continue
}
}
// Not expandable - keep as literal key, but still recursively expand the value
result[key] = expandPathsSafe(val, strict)
}
return result
}
// Primitive value - return as-is
return value
}
/**
* Inserts a value at a nested path, creating intermediate objects as needed.
*
* @remarks
* This function walks the segment path, creating nested objects as needed.
* When an existing value is encountered:
* - If both are objects: deep merge (continue insertion)
* - If values differ: conflict
* - strict=true: throw TypeError
* - strict=false: overwrite with new value (last-wins)
*
* @param target - The object to insert into
* @param segments - Array of path segments (e.g., ['data', 'metadata', 'items'])
* @param value - The value to insert at the end of the path
* @param strict - Whether to throw on conflicts
* @throws TypeError if a conflict occurs in strict mode
*/
function insertPathSafe(
target: JsonObject,
segments: readonly string[],
value: JsonValue,
strict: boolean,
): void {
let current: JsonObject = target
// Walk to the penultimate segment, creating objects as needed
for (let i = 0; i < segments.length - 1; i++) {
const seg = segments[i]!
const existing = current[seg]
if (existing === undefined) {
// Create new intermediate object
const newObj: JsonObject = {}
current[seg] = newObj
current = newObj
}
else if (isJsonObject(existing)) {
// Continue into existing object
current = existing
}
else {
// Conflict: existing value is not an object
if (strict) {
throw new TypeError(
`Path expansion conflict at segment "${seg}": expected object but found ${typeof existing}`,
)
}
// Non-strict: overwrite with new object
const newObj: JsonObject = {}
current[seg] = newObj
current = newObj
}
}
// Insert at the final segment
const lastSeg = segments[segments.length - 1]!
const existing = current[lastSeg]
if (existing === undefined) {
// No conflict - insert directly
current[lastSeg] = value
}
else if (canMerge(existing, value)) {
// Both are objects - deep merge
mergeObjects(existing as JsonObject, value as JsonObject, strict)
}
else {
// Conflict: incompatible types
if (strict) {
throw new TypeError(
`Path expansion conflict at key "${lastSeg}": cannot merge ${typeof existing} with ${typeof value}`,
)
}
// Non-strict: overwrite (LWW)
current[lastSeg] = value
}
}
/**
* Deep merges properties from source into target.
*
* @remarks
* For each key in source:
* - If key doesn't exist in target: copy it
* - If both values are objects: recursively merge
* - Otherwise: conflict (strict throws, non-strict overwrites)
*
* @param target - The target object to merge into
* @param source - The source object to merge from
* @param strict - Whether to throw on conflicts
* @throws TypeError if a conflict occurs in strict mode
*/
function mergeObjects(
target: JsonObject,
source: JsonObject,
strict: boolean,
): void {
for (const key of Object.keys(source)) {
const sourceValue = source[key]!
const targetValue = target[key]
if (targetValue === undefined) {
// Key doesn't exist in target - copy it
target[key] = sourceValue
}
else if (canMerge(targetValue, sourceValue)) {
// Both are objects - recursively merge
mergeObjects(targetValue as JsonObject, sourceValue as JsonObject, strict)
}
else {
// Conflict: incompatible types
if (strict) {
throw new TypeError(
`Path expansion conflict at key "${key}": cannot merge ${typeof targetValue} with ${typeof sourceValue}`,
)
}
// Non-strict: overwrite (LWW)
target[key] = sourceValue
}
}
}
// #endregion

View File

@@ -1,5 +1,6 @@
import type { Depth, JsonArray, JsonObject, JsonPrimitive, JsonValue, ResolvedEncodeOptions } from '../types' import type { Depth, JsonArray, JsonObject, JsonPrimitive, JsonValue, ResolvedEncodeOptions } from '../types'
import { LIST_ITEM_MARKER } from '../constants' import { LIST_ITEM_MARKER } from '../constants'
import { tryFoldKeyChain } from './folding'
import { isArrayOfArrays, isArrayOfObjects, isArrayOfPrimitives, isJsonArray, isJsonObject, isJsonPrimitive } from './normalize' import { isArrayOfArrays, isArrayOfObjects, isArrayOfPrimitives, isJsonArray, isJsonObject, isJsonPrimitive } from './normalize'
import { encodeAndJoinPrimitives, encodeKey, encodePrimitive, formatHeader } from './primitives' import { encodeAndJoinPrimitives, encodeKey, encodePrimitive, formatHeader } from './primitives'
import { LineWriter } from './writer' import { LineWriter } from './writer'
@@ -31,11 +32,46 @@ export function encodeObject(value: JsonObject, writer: LineWriter, depth: Depth
const keys = Object.keys(value) const keys = Object.keys(value)
for (const key of keys) { for (const key of keys) {
encodeKeyValuePair(key, value[key]!, writer, depth, options) encodeKeyValuePair(key, value[key]!, writer, depth, options, keys)
} }
} }
export function encodeKeyValuePair(key: string, value: JsonValue, writer: LineWriter, depth: Depth, options: ResolvedEncodeOptions): void { export function encodeKeyValuePair(key: string, value: JsonValue, writer: LineWriter, depth: Depth, options: ResolvedEncodeOptions, siblings?: readonly string[]): void {
// Attempt key folding when enabled
if (options.keyFolding === 'safe' && siblings) {
const foldResult = tryFoldKeyChain(key, value, siblings, options)
if (foldResult) {
const { foldedKey, remainder, leafValue } = foldResult
const encodedFoldedKey = encodeKey(foldedKey)
// Case 1: Fully folded to a leaf value
if (remainder === undefined) {
// The folded chain ended at a leaf (primitive, array, or empty object)
if (isJsonPrimitive(leafValue)) {
writer.push(depth, `${encodedFoldedKey}: ${encodePrimitive(leafValue, options.delimiter)}`)
return
}
else if (isJsonArray(leafValue)) {
encodeArray(foldedKey, leafValue, writer, depth, options)
return
}
else if (isJsonObject(leafValue) && Object.keys(leafValue).length === 0) {
writer.push(depth, `${encodedFoldedKey}:`)
return
}
}
// Case 2: Partially folded with a tail object
if (isJsonObject(remainder)) {
writer.push(depth, `${encodedFoldedKey}:`)
encodeObject(remainder, writer, depth + 1, options)
return
}
}
}
// No folding applied - use standard encoding
const encodedKey = encodeKey(key) const encodedKey = encodeKey(key)
if (isJsonPrimitive(value)) { if (isJsonPrimitive(value)) {

View File

@@ -0,0 +1,173 @@
import type { JsonValue, ResolvedEncodeOptions } from '../types'
import { DOT } from '../constants'
import { isIdentifierSegment } from '../shared/validation'
import { isJsonObject } from './normalize'
// #region Key folding helpers
/**
* Result of attempting to fold a key chain.
*/
export interface FoldResult {
/**
* The folded key with dot-separated segments (e.g., "data.metadata.items")
*/
foldedKey: string
/**
* The remainder value after folding:
* - `undefined` if the chain was fully folded to a leaf (primitive, array, or empty object)
* - An object if the chain was partially folded (depth limit reached with nested tail)
*/
remainder?: JsonValue
/**
* The leaf value at the end of the folded chain.
* Used to avoid redundant traversal when encoding the folded value.
*/
leafValue: JsonValue
}
/**
* Attempts to fold a single-key object chain into a dotted path.
*
* @remarks
* Folding traverses nested objects with single keys, collapsing them into a dotted path.
* It stops when:
* - A non-single-key object is encountered
* - An array is encountered (arrays are not "single-key objects")
* - A primitive value is reached
* - The flatten depth limit is reached
* - Any segment fails safe mode validation
*
* Safe mode requirements:
* - `options.keyFolding` must be `'safe'`
* - Every segment must be a valid identifier (no dots, no special chars)
* - The folded key must not collide with existing sibling keys
* - No segment should require quoting
*
* @param key - The starting key to fold
* @param value - The value associated with the key
* @param siblings - Array of all sibling keys at this level (for collision detection)
* @param options - Resolved encoding options
* @returns A FoldResult if folding is possible, undefined otherwise
*/
export function tryFoldKeyChain(
key: string,
value: JsonValue,
siblings: readonly string[],
options: ResolvedEncodeOptions,
): FoldResult | undefined {
// Only fold when safe mode is enabled
if (options.keyFolding !== 'safe') {
return undefined
}
// Can only fold objects
if (!isJsonObject(value)) {
return undefined
}
// Collect the chain of single-key objects
const { segments, tail, leafValue } = collectSingleKeyChain(key, value, options.flattenDepth)
// Need at least 2 segments for folding to be worthwhile
if (segments.length < 2) {
return undefined
}
// Validate all segments are safe identifiers
if (!segments.every(seg => isIdentifierSegment(seg))) {
return undefined
}
// Build the folded key
const foldedKey = buildFoldedKey(segments)
// Check for collision with existing literal sibling keys (inline check)
if (siblings.includes(foldedKey)) {
return undefined
}
return {
foldedKey,
remainder: tail,
leafValue,
}
}
/**
* Collects a chain of single-key objects into segments.
*
* @remarks
* Traverses nested objects, collecting keys until:
* - A non-single-key object is found
* - An array is encountered
* - A primitive is reached
* - An empty object is reached
* - The depth limit is reached
*
* @param startKey - The initial key to start the chain
* @param startValue - The value to traverse
* @param maxDepth - Maximum number of segments to collect
* @returns Object containing segments array, tail value, and leaf value
*/
function collectSingleKeyChain(
startKey: string,
startValue: JsonValue,
maxDepth: number,
): { segments: string[], tail: JsonValue | undefined, leafValue: JsonValue } {
const segments: string[] = [startKey]
let current = startValue
while (segments.length < maxDepth) {
// Must be an object to continue
if (!isJsonObject(current)) {
break
}
const keys = Object.keys(current)
// Must have exactly one key to continue the chain
if (keys.length !== 1) {
break
}
const nextKey = keys[0]!
const nextValue = current[nextKey]!
segments.push(nextKey)
current = nextValue
}
// Determine the tail - simplified with early returns
if (!isJsonObject(current)) {
// Array, primitive, or null - this is a leaf value
return { segments, tail: undefined, leafValue: current }
}
const keys = Object.keys(current)
if (keys.length === 0) {
// Empty object is a leaf
return { segments, tail: undefined, leafValue: current }
}
if (keys.length === 1 && segments.length === maxDepth) {
// Hit depth limit with remaining chain
return { segments, tail: current, leafValue: current }
}
// Multi-key object is the remainder
return { segments, tail: current, leafValue: current }
}
/**
* Builds a folded key from segments.
*
* @param segments - Array of key segments
* @returns Dot-separated key string
*/
function buildFoldedKey(segments: readonly string[]): string {
return segments.join(DOT)
}
// #endregion

View File

@@ -1,6 +1,7 @@
import type { DecodeOptions, EncodeOptions, JsonValue, ResolvedDecodeOptions, ResolvedEncodeOptions } from './types' import type { DecodeOptions, EncodeOptions, JsonValue, ResolvedDecodeOptions, ResolvedEncodeOptions } from './types'
import { DEFAULT_DELIMITER } from './constants' import { DEFAULT_DELIMITER } from './constants'
import { decodeValueFromLines } from './decode/decoders' import { decodeValueFromLines } from './decode/decoders'
import { expandPathsSafe } from './decode/expand'
import { LineCursor, toParsedLines } from './decode/scanner' import { LineCursor, toParsedLines } from './decode/scanner'
import { encodeValue } from './encode/encoders' import { encodeValue } from './encode/encoders'
import { normalizeValue } from './encode/normalize' import { normalizeValue } from './encode/normalize'
@@ -34,7 +35,14 @@ export function decode(input: string, options?: DecodeOptions): JsonValue {
} }
const cursor = new LineCursor(scanResult.lines, scanResult.blankLines) const cursor = new LineCursor(scanResult.lines, scanResult.blankLines)
return decodeValueFromLines(cursor, resolvedOptions) const value = decodeValueFromLines(cursor, resolvedOptions)
// Apply path expansion if enabled
if (resolvedOptions.expandPaths === 'safe') {
return expandPathsSafe(value, resolvedOptions.strict)
}
return value
} }
function resolveOptions(options?: EncodeOptions): ResolvedEncodeOptions { function resolveOptions(options?: EncodeOptions): ResolvedEncodeOptions {
@@ -42,6 +50,8 @@ function resolveOptions(options?: EncodeOptions): ResolvedEncodeOptions {
indent: options?.indent ?? 2, indent: options?.indent ?? 2,
delimiter: options?.delimiter ?? DEFAULT_DELIMITER, delimiter: options?.delimiter ?? DEFAULT_DELIMITER,
lengthMarker: options?.lengthMarker ?? false, lengthMarker: options?.lengthMarker ?? false,
keyFolding: options?.keyFolding ?? 'off',
flattenDepth: options?.flattenDepth ?? Number.POSITIVE_INFINITY,
} }
} }
@@ -49,5 +59,6 @@ function resolveDecodeOptions(options?: DecodeOptions): ResolvedDecodeOptions {
return { return {
indent: options?.indent ?? 2, indent: options?.indent ?? 2,
strict: options?.strict ?? true, strict: options?.strict ?? true,
expandPaths: options?.expandPaths ?? 'off',
} }
} }

View File

@@ -12,6 +12,19 @@ export function isValidUnquotedKey(key: string): boolean {
return /^[A-Z_][\w.]*$/i.test(key) return /^[A-Z_][\w.]*$/i.test(key)
} }
/**
* Checks if a key segment is a valid identifier for safe folding/expansion.
*
* @remarks
* Identifier segments are more restrictive than unquoted keys:
* - Must start with a letter or underscore
* - Followed only by letters, digits, or underscores (no dots)
* - Used for safe key folding and path expansion
*/
export function isIdentifierSegment(key: string): boolean {
return /^[A-Z_]\w*$/i.test(key)
}
/** /**
* Determines if a string value can be safely encoded without quotes. * Determines if a string value can be safely encoded without quotes.
* *

View File

@@ -30,6 +30,20 @@ export interface EncodeOptions {
* @default false * @default false
*/ */
lengthMarker?: '#' | false lengthMarker?: '#' | false
/**
* Enable key folding to collapse single-key wrapper chains.
* When set to 'safe', nested objects with single keys are collapsed into dotted paths
* (e.g., data.metadata.items instead of nested indentation).
* @default 'off'
*/
keyFolding?: 'off' | 'safe'
/**
* Maximum number of segments to fold when keyFolding is enabled.
* Controls how deep the folding can go in single-key chains.
* Values 0 or 1 have no practical effect (treated as effectively disabled).
* @default Infinity
*/
flattenDepth?: number
} }
export type ResolvedEncodeOptions = Readonly<Required<EncodeOptions>> export type ResolvedEncodeOptions = Readonly<Required<EncodeOptions>>
@@ -49,6 +63,14 @@ export interface DecodeOptions {
* @default true * @default true
*/ */
strict?: boolean strict?: boolean
/**
* Enable path expansion to reconstruct dotted keys into nested objects.
* When set to 'safe', keys containing dots are expanded into nested structures
* if all segments are valid identifiers (e.g., data.metadata.items becomes nested objects).
* Pairs with keyFolding='safe' for lossless round-trips.
* @default 'off'
*/
expandPaths?: 'off' | 'safe'
} }
export type ResolvedDecodeOptions = Readonly<Required<DecodeOptions>> export type ResolvedDecodeOptions = Readonly<Required<DecodeOptions>>

View File

@@ -7,6 +7,7 @@ import delimiters from '@toon-format/spec/tests/fixtures/decode/delimiters.json'
import indentationErrors from '@toon-format/spec/tests/fixtures/decode/indentation-errors.json' import indentationErrors from '@toon-format/spec/tests/fixtures/decode/indentation-errors.json'
import numbers from '@toon-format/spec/tests/fixtures/decode/numbers.json' import numbers from '@toon-format/spec/tests/fixtures/decode/numbers.json'
import objects from '@toon-format/spec/tests/fixtures/decode/objects.json' import objects from '@toon-format/spec/tests/fixtures/decode/objects.json'
import pathExpansion from '@toon-format/spec/tests/fixtures/decode/path-expansion.json'
import primitives from '@toon-format/spec/tests/fixtures/decode/primitives.json' import primitives from '@toon-format/spec/tests/fixtures/decode/primitives.json'
import rootForm from '@toon-format/spec/tests/fixtures/decode/root-form.json' import rootForm from '@toon-format/spec/tests/fixtures/decode/root-form.json'
import validationErrors from '@toon-format/spec/tests/fixtures/decode/validation-errors.json' import validationErrors from '@toon-format/spec/tests/fixtures/decode/validation-errors.json'
@@ -21,6 +22,7 @@ const fixtureFiles = [
arraysPrimitive, arraysPrimitive,
arraysTabular, arraysTabular,
arraysNested, arraysNested,
pathExpansion,
delimiters, delimiters,
whitespace, whitespace,
rootForm, rootForm,

View File

@@ -5,6 +5,7 @@ import arraysObjects from '@toon-format/spec/tests/fixtures/encode/arrays-object
import arraysPrimitive from '@toon-format/spec/tests/fixtures/encode/arrays-primitive.json' import arraysPrimitive from '@toon-format/spec/tests/fixtures/encode/arrays-primitive.json'
import arraysTabular from '@toon-format/spec/tests/fixtures/encode/arrays-tabular.json' import arraysTabular from '@toon-format/spec/tests/fixtures/encode/arrays-tabular.json'
import delimiters from '@toon-format/spec/tests/fixtures/encode/delimiters.json' import delimiters from '@toon-format/spec/tests/fixtures/encode/delimiters.json'
import keyFolding from '@toon-format/spec/tests/fixtures/encode/key-folding.json'
import objects from '@toon-format/spec/tests/fixtures/encode/objects.json' import objects from '@toon-format/spec/tests/fixtures/encode/objects.json'
import options from '@toon-format/spec/tests/fixtures/encode/options.json' import options from '@toon-format/spec/tests/fixtures/encode/options.json'
import primitives from '@toon-format/spec/tests/fixtures/encode/primitives.json' import primitives from '@toon-format/spec/tests/fixtures/encode/primitives.json'
@@ -19,6 +20,7 @@ const fixtureFiles = [
arraysTabular, arraysTabular,
arraysNested, arraysNested,
arraysObjects, arraysObjects,
keyFolding,
delimiters, delimiters,
whitespace, whitespace,
options, options,
@@ -48,5 +50,7 @@ function resolveEncodeOptions(options?: TestCase['options']): ResolvedEncodeOpti
indent: options?.indent ?? 2, indent: options?.indent ?? 2,
delimiter: options?.delimiter ?? DEFAULT_DELIMITER, delimiter: options?.delimiter ?? DEFAULT_DELIMITER,
lengthMarker: options?.lengthMarker === '#' ? '#' : false, lengthMarker: options?.lengthMarker === '#' ? '#' : false,
keyFolding: options?.keyFolding ?? 'off',
flattenDepth: options?.flattenDepth ?? Number.POSITIVE_INFINITY,
} }
} }

View File

@@ -13,8 +13,11 @@ export interface TestCase {
options?: { options?: {
delimiter?: ',' | '\t' | '|' delimiter?: ',' | '\t' | '|'
indent?: number indent?: number
lengthMarker?: '#' | '' lengthMarker?: '#'
strict?: boolean strict?: boolean
keyFolding?: 'off' | 'safe'
flattenDepth?: number
expandPaths?: 'off' | 'safe'
} }
specSection?: string specSection?: string
note?: string note?: string