From eefb0242e2f0d88904ffd827bdf6d19fab3f3829 Mon Sep 17 00:00:00 2001 From: Johann Schopplich Date: Mon, 10 Nov 2025 09:56:09 +0100 Subject: [PATCH] feat: opt-in key folding and path expansion (closes #86) --- README.md | 48 +++++- packages/cli/README.md | 72 +++++++++ packages/cli/src/conversion.ts | 16 +- packages/cli/src/index.ts | 54 ++++++- packages/toon/src/constants.ts | 1 + packages/toon/src/decode/expand.ts | 196 +++++++++++++++++++++++++ packages/toon/src/encode/encoders.ts | 40 ++++- packages/toon/src/encode/folding.ts | 173 ++++++++++++++++++++++ packages/toon/src/index.ts | 13 +- packages/toon/src/shared/validation.ts | 13 ++ packages/toon/src/types.ts | 22 +++ packages/toon/test/decode.test.ts | 2 + packages/toon/test/encode.test.ts | 4 + packages/toon/test/types.ts | 5 +- 14 files changed, 647 insertions(+), 12 deletions(-) create mode 100644 packages/toon/src/decode/expand.ts create mode 100644 packages/toon/src/encode/folding.ts diff --git a/README.md b/README.md index ad893b9..4c5e540 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ [![CI](https://github.com/toon-format/toon/actions/workflows/ci.yml/badge.svg)](https://github.com/toon-format/toon/actions) [![npm version](https://img.shields.io/npm/v/@toon-format/toon.svg)](https://www.npmjs.com/package/@toon-format/toon) -[![SPEC v1.4](https://img.shields.io/badge/spec-v1.4-lightgray)](https://github.com/toon-format/spec) +[![SPEC v1.5](https://img.shields.io/badge/spec-v1.5-lightgray)](https://github.com/toon-format/spec) [![npm downloads (total)](https://img.shields.io/npm/dt/@toon-format/toon.svg)](https://www.npmjs.com/package/@toon-format/toon) [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](./LICENSE) @@ -80,6 +80,7 @@ See [benchmarks](#benchmarks) for concrete comparisons across different data str - ๐Ÿฑ **Minimal syntax:** removes redundant punctuation (braces, brackets, most quotes) - ๐Ÿ“ **Indentation-based structure:** like YAML, uses whitespace instead of braces - ๐Ÿงบ **Tabular arrays:** declare keys once, stream data as rows +- ๐Ÿ”— **Optional key folding (v1.5):** collapses single-key wrapper chains into dotted paths (e.g., `data.metadata.items`) to reduce indentation and tokens [^1]: For flat tabular data, CSV is more compact. TOON adds minimal overhead to provide explicit structure and validation that improves LLM reliability. @@ -736,6 +737,9 @@ cat data.toon | npx @toon-format/cli --decode | `--length-marker` | Add `#` prefix to array lengths (e.g., `items[#3]`) | | `--stats` | Show token count estimates and savings (encode only) | | `--no-strict` | Disable strict validation when decoding | +| `--key-folding ` | Key folding mode: `off`, `safe` (default: `off`) - collapses nested chains (v1.5) | +| `--flatten-depth ` | Maximum segments to fold (default: `Infinity`) - requires `--key-folding safe` (v1.5) | +| `--expand-paths ` | Path expansion mode: `off`, `safe` (default: `off`) - reconstructs dotted keys (v1.5) | ### Examples @@ -752,6 +756,9 @@ npx @toon-format/cli data.json --delimiter "|" --length-marker -o output.toon # Lenient decoding (skip validation) npx @toon-format/cli data.toon --no-strict -o output.json +# Key folding for nested data (v1.5) +npx @toon-format/cli data.json --key-folding safe -o output.toon + # Stdin workflows echo '{"name": "Ada", "age": 30}' | npx @toon-format/cli --stats cat large-dataset.json | npx @toon-format/cli --delimiter "\t" > output.toon @@ -797,6 +804,40 @@ user: name: Ada ``` +### Key Folding (Optional) + +New in v1.5: Optionally collapse single-key wrapper chains into dotted paths to reduce tokens. Enable with `keyFolding: 'safe'`. + +Standard nesting: + +``` +data: + metadata: + items[2]: a,b +``` + +With key folding: + +``` +data.metadata.items[2]: a,b +``` + +Round-trip with path expansion: + +```ts +import { decode, encode } from '@toon-format/toon' + +const original = { data: { metadata: { items: ['a', 'b'] } } } + +const toon = encode(original, { keyFolding: 'safe' }) +// โ†’ "data.metadata.items[2]: a,b" + +const restored = decode(toon, { expandPaths: 'safe' }) +// โ†’ Matches original structure +``` + +See ยง13.4 in the [specification](https://github.com/toon-format/spec/blob/main/SPEC.md#134-key-folding-and-path-expansion) for folding rules and safety guarantees. + ### Arrays > [!TIP] @@ -975,6 +1016,8 @@ Converts any JSON-serializable value to TOON format. - `indent?: number` โ€“ Number of spaces per indentation level (default: `2`) - `delimiter?: ',' | '\t' | '|'` โ€“ Delimiter for array values and tabular rows (default: `','`) - `lengthMarker?: '#' | false` โ€“ Optional marker to prefix array lengths (default: `false`) + - `keyFolding?: 'off' | 'safe'` โ€“ Enable key folding to collapse single-key wrapper chains into dotted paths (default: `'off'`). When `'safe'`, only valid identifier segments are folded (v1.5) + - `flattenDepth?: number` โ€“ Maximum number of segments to fold when `keyFolding` is enabled (default: `Infinity`). Values 0-1 have no practical effect (v1.5) **Returns:** @@ -1096,6 +1139,7 @@ Converts a TOON-formatted string back to JavaScript values. - `options` โ€“ Optional decoding options: - `indent?: number` โ€“ Expected number of spaces per indentation level (default: `2`) - `strict?: boolean` โ€“ Enable strict validation (default: `true`) + - `expandPaths?: 'off' | 'safe'` โ€“ Enable path expansion to reconstruct dotted keys into nested objects (default: `'off'`). Pairs with `keyFolding: 'safe'` for lossless round-trips (v1.5) **Returns:** @@ -1223,7 +1267,7 @@ Task: Return only users with role "user" as TOON. Use the same header. Set [N] t ## Other Implementations > [!NOTE] -> When implementing TOON in other languages, please follow the [specification](https://github.com/toon-format/spec/blob/main/SPEC.md) (currently v1.4) to ensure compatibility across implementations. The [conformance tests](https://github.com/toon-format/spec/tree/main/tests) provide language-agnostic test fixtures that validate implementations across any language. +> When implementing TOON in other languages, please follow the [specification](https://github.com/toon-format/spec/blob/main/SPEC.md) (currently v1.5) to ensure compatibility across implementations. The [conformance tests](https://github.com/toon-format/spec/tree/main/tests) provide language-agnostic test fixtures that validate your implementations. ### Official Implementations diff --git a/packages/cli/README.md b/packages/cli/README.md index 6d5faf6..9f97d56 100644 --- a/packages/cli/README.md +++ b/packages/cli/README.md @@ -65,6 +65,9 @@ cat data.toon | toon --decode | `--length-marker` | Add `#` prefix to array lengths (e.g., `items[#3]`) | | `--stats` | Show token count estimates and savings (encode only) | | `--no-strict` | Disable strict validation when decoding | +| `--key-folding ` | Enable key folding: `off`, `safe` (default: `off`) - v1.5 | +| `--flatten-depth ` | Maximum folded segment count when key folding is enabled (default: `Infinity`) - v1.5 | +| `--expand-paths ` | Enable path expansion: `off`, `safe` (default: `off`) - v1.5 | ## Advanced Examples @@ -119,12 +122,81 @@ cat large-dataset.json | toon --delimiter "\t" > output.toon jq '.results' data.json | toon > filtered.toon ``` +### Key Folding (v1.5) + +Collapse nested wrapper chains to reduce tokens: + +#### Basic key folding + +```bash +# Encode with key folding +toon input.json --key-folding safe -o output.toon +``` + +For data like: +```json +{ + "data": { + "metadata": { + "items": ["a", "b"] + } + } +} +``` + +Output becomes: +``` +data.metadata.items[2]: a,b +``` + +Instead of: +``` +data: + metadata: + items[2]: a,b +``` + +#### Limit folding depth + +```bash +# Fold maximum 2 levels deep +toon input.json --key-folding safe --flatten-depth 2 -o output.toon +``` + +#### Path expansion on decode + +```bash +# Reconstruct nested structure from folded keys +toon data.toon --expand-paths safe -o output.json +``` + +#### Round-trip workflow + +```bash +# Encode with folding +toon input.json --key-folding safe -o compressed.toon + +# Decode with expansion (restores original structure) +toon compressed.toon --expand-paths safe -o output.json + +# Verify round-trip +diff input.json output.json +``` + +#### Combined with other options + +```bash +# Key folding + tab delimiter + stats +toon data.json --key-folding safe --delimiter "\t" --stats -o output.toon +``` + ## Why Use the CLI? - **Quick conversions** between formats without writing code - **Token analysis** to see potential savings before sending to LLMs - **Pipeline integration** with existing JSON-based workflows - **Flexible formatting** with delimiter and indentation options +- **Key folding (v1.5)** to collapse nested wrappers for additional token savings ## Related diff --git a/packages/cli/src/conversion.ts b/packages/cli/src/conversion.ts index 228ab33..6bdc6f3 100644 --- a/packages/cli/src/conversion.ts +++ b/packages/cli/src/conversion.ts @@ -1,4 +1,4 @@ -import type { DecodeOptions, Delimiter, EncodeOptions } from '../../toon/src' +import type { DecodeOptions, EncodeOptions } from '../../toon/src' import type { InputSource } from './types' import * as fsp from 'node:fs/promises' import * as path from 'node:path' @@ -11,9 +11,11 @@ import { formatInputLabel, readInput } from './utils' export async function encodeToToon(config: { input: InputSource output?: string - delimiter: Delimiter - indent: number + indent: NonNullable + delimiter: NonNullable lengthMarker: NonNullable + keyFolding?: NonNullable + flattenDepth?: number printStats: boolean }): Promise { const jsonContent = await readInput(config.input) @@ -30,6 +32,8 @@ export async function encodeToToon(config: { delimiter: config.delimiter, indent: config.indent, lengthMarker: config.lengthMarker, + keyFolding: config.keyFolding, + flattenDepth: config.flattenDepth, } const toonOutput = encode(data, encodeOptions) @@ -59,8 +63,9 @@ export async function encodeToToon(config: { export async function decodeToJson(config: { input: InputSource output?: string - indent: number - strict: boolean + indent: NonNullable + strict: NonNullable + expandPaths?: NonNullable }): Promise { const toonContent = await readInput(config.input) @@ -69,6 +74,7 @@ export async function decodeToJson(config: { const decodeOptions: DecodeOptions = { indent: config.indent, strict: config.strict, + expandPaths: config.expandPaths, } data = decode(toonContent, decodeOptions) } diff --git a/packages/cli/src/index.ts b/packages/cli/src/index.ts index 258a62d..c0584cb 100644 --- a/packages/cli/src/index.ts +++ b/packages/cli/src/index.ts @@ -1,5 +1,5 @@ import type { CommandDef } from 'citty' -import type { Delimiter } from '../../toon/src' +import type { DecodeOptions, Delimiter, EncodeOptions } from '../../toon/src' import type { InputSource } from './types' import * as path from 'node:path' import process from 'node:process' @@ -51,6 +51,20 @@ export const mainCommand: CommandDef<{ description: string default: true } + keyFolding: { + type: 'string' + description: string + default: string + } + flattenDepth: { + type: 'string' + description: string + } + expandPaths: { + type: 'string' + description: string + default: string + } stats: { type: 'boolean' description: string @@ -103,6 +117,20 @@ export const mainCommand: CommandDef<{ description: 'Enable strict mode for decoding', default: true, }, + keyFolding: { + type: 'string', + description: 'Enable key folding: off, safe (default: off)', + default: 'off', + }, + flattenDepth: { + type: 'string', + description: 'Maximum folded segment count when key folding is enabled (default: Infinity)', + }, + expandPaths: { + type: 'string', + description: 'Enable path expansion: off, safe (default: off)', + default: 'off', + }, stats: { type: 'boolean', description: 'Show token statistics', @@ -129,6 +157,27 @@ export const mainCommand: CommandDef<{ throw new Error(`Invalid delimiter "${delimiter}". Valid delimiters are: comma (,), tab (\\t), pipe (|)`) } + // Validate `keyFolding` + const keyFolding = args.keyFolding || 'off' + if (keyFolding !== 'off' && keyFolding !== 'safe') { + throw new Error(`Invalid keyFolding value "${keyFolding}". Valid values are: off, safe`) + } + + // Parse and validate `flattenDepth` + let flattenDepth: number | undefined + if (args.flattenDepth !== undefined) { + flattenDepth = Number.parseInt(args.flattenDepth, 10) + if (Number.isNaN(flattenDepth) || flattenDepth < 0) { + throw new Error(`Invalid flattenDepth value: ${args.flattenDepth}`) + } + } + + // Validate `expandPaths` + const expandPaths = args.expandPaths || 'off' + if (expandPaths !== 'off' && expandPaths !== 'safe') { + throw new Error(`Invalid expandPaths value "${expandPaths}". Valid values are: off, safe`) + } + const mode = detectMode(inputSource, args.encode, args.decode) try { @@ -140,6 +189,8 @@ export const mainCommand: CommandDef<{ indent, lengthMarker: args.lengthMarker === true ? '#' : false, printStats: args.stats === true, + keyFolding: keyFolding as NonNullable, + flattenDepth, }) } else { @@ -148,6 +199,7 @@ export const mainCommand: CommandDef<{ output: outputPath, indent, strict: args.strict !== false, + expandPaths: expandPaths as NonNullable, }) } } diff --git a/packages/toon/src/constants.ts b/packages/toon/src/constants.ts index 33ceb36..059dae2 100644 --- a/packages/toon/src/constants.ts +++ b/packages/toon/src/constants.ts @@ -12,6 +12,7 @@ export const COLON = ':' export const SPACE = ' ' export const PIPE = '|' export const HASH = '#' +export const DOT = '.' // #endregion diff --git a/packages/toon/src/decode/expand.ts b/packages/toon/src/decode/expand.ts new file mode 100644 index 0000000..bdddce8 --- /dev/null +++ b/packages/toon/src/decode/expand.ts @@ -0,0 +1,196 @@ +import type { JsonObject, JsonValue } from '../types' +import { DOT } from '../constants' +import { isJsonObject } from '../encode/normalize' +import { isIdentifierSegment } from '../shared/validation' + +// #region Path expansion (safe) + +/** + * Checks if two values can be merged (both are plain objects). + */ +function canMerge(a: JsonValue, b: JsonValue): a is JsonObject { + return isJsonObject(a) && isJsonObject(b) +} + +/** + * Expands dotted keys into nested objects in safe mode. + * + * @remarks + * This function recursively traverses a decoded TOON value and expands any keys + * containing dots (`.`) into nested object structures, provided all segments + * are valid identifiers. + * + * Expansion rules: + * - Keys containing dots are split into segments + * - All segments must pass `isIdentifierSegment` validation + * - Non-eligible keys (with special characters) are left as literal dotted keys + * - Deep merge: When multiple dotted keys expand to the same path, their values are merged if both are objects + * - Conflict handling: + * - `strict=true`: Throws TypeError on conflicts (non-object collision) + * - `strict=false`: LWW (silent overwrite) + * + * @param value - The decoded value to expand + * @param strict - Whether to throw errors on conflicts + * @returns The expanded value with dotted keys reconstructed as nested objects + * @throws TypeError if conflicts occur in strict mode + */ +export function expandPathsSafe(value: JsonValue, strict: boolean): JsonValue { + if (Array.isArray(value)) { + // Recursively expand array elements + return value.map(item => expandPathsSafe(item, strict)) + } + + if (isJsonObject(value)) { + const result: JsonObject = {} + const keys = Object.keys(value) + + for (const key of keys) { + const val = value[key]! + + // Check if key contains dots + if (key.includes(DOT)) { + const segments = key.split(DOT) + + // Validate all segments are identifiers + if (segments.every(seg => isIdentifierSegment(seg))) { + // Expand this dotted key + const expandedValue = expandPathsSafe(val, strict) + insertPathSafe(result, segments, expandedValue, strict) + continue + } + } + + // Not expandable - keep as literal key, but still recursively expand the value + result[key] = expandPathsSafe(val, strict) + } + + return result + } + + // Primitive value - return as-is + return value +} + +/** + * Inserts a value at a nested path, creating intermediate objects as needed. + * + * @remarks + * This function walks the segment path, creating nested objects as needed. + * When an existing value is encountered: + * - If both are objects: deep merge (continue insertion) + * - If values differ: conflict + * - strict=true: throw TypeError + * - strict=false: overwrite with new value (last-wins) + * + * @param target - The object to insert into + * @param segments - Array of path segments (e.g., ['data', 'metadata', 'items']) + * @param value - The value to insert at the end of the path + * @param strict - Whether to throw on conflicts + * @throws TypeError if a conflict occurs in strict mode + */ +function insertPathSafe( + target: JsonObject, + segments: readonly string[], + value: JsonValue, + strict: boolean, +): void { + let current: JsonObject = target + + // Walk to the penultimate segment, creating objects as needed + for (let i = 0; i < segments.length - 1; i++) { + const seg = segments[i]! + const existing = current[seg] + + if (existing === undefined) { + // Create new intermediate object + const newObj: JsonObject = {} + current[seg] = newObj + current = newObj + } + else if (isJsonObject(existing)) { + // Continue into existing object + current = existing + } + else { + // Conflict: existing value is not an object + if (strict) { + throw new TypeError( + `Path expansion conflict at segment "${seg}": expected object but found ${typeof existing}`, + ) + } + // Non-strict: overwrite with new object + const newObj: JsonObject = {} + current[seg] = newObj + current = newObj + } + } + + // Insert at the final segment + const lastSeg = segments[segments.length - 1]! + const existing = current[lastSeg] + + if (existing === undefined) { + // No conflict - insert directly + current[lastSeg] = value + } + else if (canMerge(existing, value)) { + // Both are objects - deep merge + mergeObjects(existing as JsonObject, value as JsonObject, strict) + } + else { + // Conflict: incompatible types + if (strict) { + throw new TypeError( + `Path expansion conflict at key "${lastSeg}": cannot merge ${typeof existing} with ${typeof value}`, + ) + } + // Non-strict: overwrite (LWW) + current[lastSeg] = value + } +} + +/** + * Deep merges properties from source into target. + * + * @remarks + * For each key in source: + * - If key doesn't exist in target: copy it + * - If both values are objects: recursively merge + * - Otherwise: conflict (strict throws, non-strict overwrites) + * + * @param target - The target object to merge into + * @param source - The source object to merge from + * @param strict - Whether to throw on conflicts + * @throws TypeError if a conflict occurs in strict mode + */ +function mergeObjects( + target: JsonObject, + source: JsonObject, + strict: boolean, +): void { + for (const key of Object.keys(source)) { + const sourceValue = source[key]! + const targetValue = target[key] + + if (targetValue === undefined) { + // Key doesn't exist in target - copy it + target[key] = sourceValue + } + else if (canMerge(targetValue, sourceValue)) { + // Both are objects - recursively merge + mergeObjects(targetValue as JsonObject, sourceValue as JsonObject, strict) + } + else { + // Conflict: incompatible types + if (strict) { + throw new TypeError( + `Path expansion conflict at key "${key}": cannot merge ${typeof targetValue} with ${typeof sourceValue}`, + ) + } + // Non-strict: overwrite (LWW) + target[key] = sourceValue + } + } +} + +// #endregion diff --git a/packages/toon/src/encode/encoders.ts b/packages/toon/src/encode/encoders.ts index a71c179..2c8c4e3 100644 --- a/packages/toon/src/encode/encoders.ts +++ b/packages/toon/src/encode/encoders.ts @@ -1,5 +1,6 @@ import type { Depth, JsonArray, JsonObject, JsonPrimitive, JsonValue, ResolvedEncodeOptions } from '../types' import { LIST_ITEM_MARKER } from '../constants' +import { tryFoldKeyChain } from './folding' import { isArrayOfArrays, isArrayOfObjects, isArrayOfPrimitives, isJsonArray, isJsonObject, isJsonPrimitive } from './normalize' import { encodeAndJoinPrimitives, encodeKey, encodePrimitive, formatHeader } from './primitives' import { LineWriter } from './writer' @@ -31,11 +32,46 @@ export function encodeObject(value: JsonObject, writer: LineWriter, depth: Depth const keys = Object.keys(value) for (const key of keys) { - encodeKeyValuePair(key, value[key]!, writer, depth, options) + encodeKeyValuePair(key, value[key]!, writer, depth, options, keys) } } -export function encodeKeyValuePair(key: string, value: JsonValue, writer: LineWriter, depth: Depth, options: ResolvedEncodeOptions): void { +export function encodeKeyValuePair(key: string, value: JsonValue, writer: LineWriter, depth: Depth, options: ResolvedEncodeOptions, siblings?: readonly string[]): void { + // Attempt key folding when enabled + if (options.keyFolding === 'safe' && siblings) { + const foldResult = tryFoldKeyChain(key, value, siblings, options) + + if (foldResult) { + const { foldedKey, remainder, leafValue } = foldResult + const encodedFoldedKey = encodeKey(foldedKey) + + // Case 1: Fully folded to a leaf value + if (remainder === undefined) { + // The folded chain ended at a leaf (primitive, array, or empty object) + if (isJsonPrimitive(leafValue)) { + writer.push(depth, `${encodedFoldedKey}: ${encodePrimitive(leafValue, options.delimiter)}`) + return + } + else if (isJsonArray(leafValue)) { + encodeArray(foldedKey, leafValue, writer, depth, options) + return + } + else if (isJsonObject(leafValue) && Object.keys(leafValue).length === 0) { + writer.push(depth, `${encodedFoldedKey}:`) + return + } + } + + // Case 2: Partially folded with a tail object + if (isJsonObject(remainder)) { + writer.push(depth, `${encodedFoldedKey}:`) + encodeObject(remainder, writer, depth + 1, options) + return + } + } + } + + // No folding applied - use standard encoding const encodedKey = encodeKey(key) if (isJsonPrimitive(value)) { diff --git a/packages/toon/src/encode/folding.ts b/packages/toon/src/encode/folding.ts new file mode 100644 index 0000000..bafcdfe --- /dev/null +++ b/packages/toon/src/encode/folding.ts @@ -0,0 +1,173 @@ +import type { JsonValue, ResolvedEncodeOptions } from '../types' +import { DOT } from '../constants' +import { isIdentifierSegment } from '../shared/validation' +import { isJsonObject } from './normalize' + +// #region Key folding helpers + +/** + * Result of attempting to fold a key chain. + */ +export interface FoldResult { + /** + * The folded key with dot-separated segments (e.g., "data.metadata.items") + */ + foldedKey: string + /** + * The remainder value after folding: + * - `undefined` if the chain was fully folded to a leaf (primitive, array, or empty object) + * - An object if the chain was partially folded (depth limit reached with nested tail) + */ + remainder?: JsonValue + /** + * The leaf value at the end of the folded chain. + * Used to avoid redundant traversal when encoding the folded value. + */ + leafValue: JsonValue +} + +/** + * Attempts to fold a single-key object chain into a dotted path. + * + * @remarks + * Folding traverses nested objects with single keys, collapsing them into a dotted path. + * It stops when: + * - A non-single-key object is encountered + * - An array is encountered (arrays are not "single-key objects") + * - A primitive value is reached + * - The flatten depth limit is reached + * - Any segment fails safe mode validation + * + * Safe mode requirements: + * - `options.keyFolding` must be `'safe'` + * - Every segment must be a valid identifier (no dots, no special chars) + * - The folded key must not collide with existing sibling keys + * - No segment should require quoting + * + * @param key - The starting key to fold + * @param value - The value associated with the key + * @param siblings - Array of all sibling keys at this level (for collision detection) + * @param options - Resolved encoding options + * @returns A FoldResult if folding is possible, undefined otherwise + */ +export function tryFoldKeyChain( + key: string, + value: JsonValue, + siblings: readonly string[], + options: ResolvedEncodeOptions, +): FoldResult | undefined { + // Only fold when safe mode is enabled + if (options.keyFolding !== 'safe') { + return undefined + } + + // Can only fold objects + if (!isJsonObject(value)) { + return undefined + } + + // Collect the chain of single-key objects + const { segments, tail, leafValue } = collectSingleKeyChain(key, value, options.flattenDepth) + + // Need at least 2 segments for folding to be worthwhile + if (segments.length < 2) { + return undefined + } + + // Validate all segments are safe identifiers + if (!segments.every(seg => isIdentifierSegment(seg))) { + return undefined + } + + // Build the folded key + const foldedKey = buildFoldedKey(segments) + + // Check for collision with existing literal sibling keys (inline check) + if (siblings.includes(foldedKey)) { + return undefined + } + + return { + foldedKey, + remainder: tail, + leafValue, + } +} + +/** + * Collects a chain of single-key objects into segments. + * + * @remarks + * Traverses nested objects, collecting keys until: + * - A non-single-key object is found + * - An array is encountered + * - A primitive is reached + * - An empty object is reached + * - The depth limit is reached + * + * @param startKey - The initial key to start the chain + * @param startValue - The value to traverse + * @param maxDepth - Maximum number of segments to collect + * @returns Object containing segments array, tail value, and leaf value + */ +function collectSingleKeyChain( + startKey: string, + startValue: JsonValue, + maxDepth: number, +): { segments: string[], tail: JsonValue | undefined, leafValue: JsonValue } { + const segments: string[] = [startKey] + let current = startValue + + while (segments.length < maxDepth) { + // Must be an object to continue + if (!isJsonObject(current)) { + break + } + + const keys = Object.keys(current) + + // Must have exactly one key to continue the chain + if (keys.length !== 1) { + break + } + + const nextKey = keys[0]! + const nextValue = current[nextKey]! + + segments.push(nextKey) + current = nextValue + } + + // Determine the tail - simplified with early returns + if (!isJsonObject(current)) { + // Array, primitive, or null - this is a leaf value + return { segments, tail: undefined, leafValue: current } + } + + const keys = Object.keys(current) + + if (keys.length === 0) { + // Empty object is a leaf + return { segments, tail: undefined, leafValue: current } + } + + if (keys.length === 1 && segments.length === maxDepth) { + // Hit depth limit with remaining chain + return { segments, tail: current, leafValue: current } + } + + // Multi-key object is the remainder + return { segments, tail: current, leafValue: current } +} + +/** + * Builds a folded key from segments. + * + * @param segments - Array of key segments + * @returns Dot-separated key string + */ +function buildFoldedKey(segments: readonly string[]): string { + return segments.join(DOT) +} + +// #endregion diff --git a/packages/toon/src/index.ts b/packages/toon/src/index.ts index ef2a9c5..f1674bf 100644 --- a/packages/toon/src/index.ts +++ b/packages/toon/src/index.ts @@ -1,6 +1,7 @@ import type { DecodeOptions, EncodeOptions, JsonValue, ResolvedDecodeOptions, ResolvedEncodeOptions } from './types' import { DEFAULT_DELIMITER } from './constants' import { decodeValueFromLines } from './decode/decoders' +import { expandPathsSafe } from './decode/expand' import { LineCursor, toParsedLines } from './decode/scanner' import { encodeValue } from './encode/encoders' import { normalizeValue } from './encode/normalize' @@ -34,7 +35,14 @@ export function decode(input: string, options?: DecodeOptions): JsonValue { } const cursor = new LineCursor(scanResult.lines, scanResult.blankLines) - return decodeValueFromLines(cursor, resolvedOptions) + const value = decodeValueFromLines(cursor, resolvedOptions) + + // Apply path expansion if enabled + if (resolvedOptions.expandPaths === 'safe') { + return expandPathsSafe(value, resolvedOptions.strict) + } + + return value } function resolveOptions(options?: EncodeOptions): ResolvedEncodeOptions { @@ -42,6 +50,8 @@ function resolveOptions(options?: EncodeOptions): ResolvedEncodeOptions { indent: options?.indent ?? 2, delimiter: options?.delimiter ?? DEFAULT_DELIMITER, lengthMarker: options?.lengthMarker ?? false, + keyFolding: options?.keyFolding ?? 'off', + flattenDepth: options?.flattenDepth ?? Number.POSITIVE_INFINITY, } } @@ -49,5 +59,6 @@ function resolveDecodeOptions(options?: DecodeOptions): ResolvedDecodeOptions { return { indent: options?.indent ?? 2, strict: options?.strict ?? true, + expandPaths: options?.expandPaths ?? 'off', } } diff --git a/packages/toon/src/shared/validation.ts b/packages/toon/src/shared/validation.ts index 22cfefc..854c492 100644 --- a/packages/toon/src/shared/validation.ts +++ b/packages/toon/src/shared/validation.ts @@ -12,6 +12,19 @@ export function isValidUnquotedKey(key: string): boolean { return /^[A-Z_][\w.]*$/i.test(key) } +/** + * Checks if a key segment is a valid identifier for safe folding/expansion. + * + * @remarks + * Identifier segments are more restrictive than unquoted keys: + * - Must start with a letter or underscore + * - Followed only by letters, digits, or underscores (no dots) + * - Used for safe key folding and path expansion + */ +export function isIdentifierSegment(key: string): boolean { + return /^[A-Z_]\w*$/i.test(key) +} + /** * Determines if a string value can be safely encoded without quotes. * diff --git a/packages/toon/src/types.ts b/packages/toon/src/types.ts index ef20622..f0ac49a 100644 --- a/packages/toon/src/types.ts +++ b/packages/toon/src/types.ts @@ -30,6 +30,20 @@ export interface EncodeOptions { * @default false */ lengthMarker?: '#' | false + /** + * Enable key folding to collapse single-key wrapper chains. + * When set to 'safe', nested objects with single keys are collapsed into dotted paths + * (e.g., data.metadata.items instead of nested indentation). + * @default 'off' + */ + keyFolding?: 'off' | 'safe' + /** + * Maximum number of segments to fold when keyFolding is enabled. + * Controls how deep the folding can go in single-key chains. + * Values 0 or 1 have no practical effect (treated as effectively disabled). + * @default Infinity + */ + flattenDepth?: number } export type ResolvedEncodeOptions = Readonly> @@ -49,6 +63,14 @@ export interface DecodeOptions { * @default true */ strict?: boolean + /** + * Enable path expansion to reconstruct dotted keys into nested objects. + * When set to 'safe', keys containing dots are expanded into nested structures + * if all segments are valid identifiers (e.g., data.metadata.items becomes nested objects). + * Pairs with keyFolding='safe' for lossless round-trips. + * @default 'off' + */ + expandPaths?: 'off' | 'safe' } export type ResolvedDecodeOptions = Readonly> diff --git a/packages/toon/test/decode.test.ts b/packages/toon/test/decode.test.ts index 0ee94c2..5534053 100644 --- a/packages/toon/test/decode.test.ts +++ b/packages/toon/test/decode.test.ts @@ -7,6 +7,7 @@ import delimiters from '@toon-format/spec/tests/fixtures/decode/delimiters.json' import indentationErrors from '@toon-format/spec/tests/fixtures/decode/indentation-errors.json' import numbers from '@toon-format/spec/tests/fixtures/decode/numbers.json' import objects from '@toon-format/spec/tests/fixtures/decode/objects.json' +import pathExpansion from '@toon-format/spec/tests/fixtures/decode/path-expansion.json' import primitives from '@toon-format/spec/tests/fixtures/decode/primitives.json' import rootForm from '@toon-format/spec/tests/fixtures/decode/root-form.json' import validationErrors from '@toon-format/spec/tests/fixtures/decode/validation-errors.json' @@ -21,6 +22,7 @@ const fixtureFiles = [ arraysPrimitive, arraysTabular, arraysNested, + pathExpansion, delimiters, whitespace, rootForm, diff --git a/packages/toon/test/encode.test.ts b/packages/toon/test/encode.test.ts index 173387e..69f2b0c 100644 --- a/packages/toon/test/encode.test.ts +++ b/packages/toon/test/encode.test.ts @@ -5,6 +5,7 @@ import arraysObjects from '@toon-format/spec/tests/fixtures/encode/arrays-object import arraysPrimitive from '@toon-format/spec/tests/fixtures/encode/arrays-primitive.json' import arraysTabular from '@toon-format/spec/tests/fixtures/encode/arrays-tabular.json' import delimiters from '@toon-format/spec/tests/fixtures/encode/delimiters.json' +import keyFolding from '@toon-format/spec/tests/fixtures/encode/key-folding.json' import objects from '@toon-format/spec/tests/fixtures/encode/objects.json' import options from '@toon-format/spec/tests/fixtures/encode/options.json' import primitives from '@toon-format/spec/tests/fixtures/encode/primitives.json' @@ -19,6 +20,7 @@ const fixtureFiles = [ arraysTabular, arraysNested, arraysObjects, + keyFolding, delimiters, whitespace, options, @@ -48,5 +50,7 @@ function resolveEncodeOptions(options?: TestCase['options']): ResolvedEncodeOpti indent: options?.indent ?? 2, delimiter: options?.delimiter ?? DEFAULT_DELIMITER, lengthMarker: options?.lengthMarker === '#' ? '#' : false, + keyFolding: options?.keyFolding ?? 'off', + flattenDepth: options?.flattenDepth ?? Number.POSITIVE_INFINITY, } } diff --git a/packages/toon/test/types.ts b/packages/toon/test/types.ts index 2783676..0bba111 100644 --- a/packages/toon/test/types.ts +++ b/packages/toon/test/types.ts @@ -13,8 +13,11 @@ export interface TestCase { options?: { delimiter?: ',' | '\t' | '|' indent?: number - lengthMarker?: '#' | '' + lengthMarker?: '#' strict?: boolean + keyFolding?: 'off' | 'safe' + flattenDepth?: number + expandPaths?: 'off' | 'safe' } specSection?: string note?: string