mirror of
https://github.com/voson-wang/toon.git
synced 2026-01-29 15:24:10 +08:00
feat: opt-in key folding and path expansion (closes #86)
This commit is contained in:
48
README.md
48
README.md
@@ -4,7 +4,7 @@
|
||||
|
||||
[](https://github.com/toon-format/toon/actions)
|
||||
[](https://www.npmjs.com/package/@toon-format/toon)
|
||||
[](https://github.com/toon-format/spec)
|
||||
[](https://github.com/toon-format/spec)
|
||||
[](https://www.npmjs.com/package/@toon-format/toon)
|
||||
[](./LICENSE)
|
||||
|
||||
@@ -80,6 +80,7 @@ See [benchmarks](#benchmarks) for concrete comparisons across different data str
|
||||
- 🍱 **Minimal syntax:** removes redundant punctuation (braces, brackets, most quotes)
|
||||
- 📐 **Indentation-based structure:** like YAML, uses whitespace instead of braces
|
||||
- 🧺 **Tabular arrays:** declare keys once, stream data as rows
|
||||
- 🔗 **Optional key folding (v1.5):** collapses single-key wrapper chains into dotted paths (e.g., `data.metadata.items`) to reduce indentation and tokens
|
||||
|
||||
[^1]: For flat tabular data, CSV is more compact. TOON adds minimal overhead to provide explicit structure and validation that improves LLM reliability.
|
||||
|
||||
@@ -736,6 +737,9 @@ cat data.toon | npx @toon-format/cli --decode
|
||||
| `--length-marker` | Add `#` prefix to array lengths (e.g., `items[#3]`) |
|
||||
| `--stats` | Show token count estimates and savings (encode only) |
|
||||
| `--no-strict` | Disable strict validation when decoding |
|
||||
| `--key-folding <mode>` | Key folding mode: `off`, `safe` (default: `off`) - collapses nested chains (v1.5) |
|
||||
| `--flatten-depth <number>` | Maximum segments to fold (default: `Infinity`) - requires `--key-folding safe` (v1.5) |
|
||||
| `--expand-paths <mode>` | Path expansion mode: `off`, `safe` (default: `off`) - reconstructs dotted keys (v1.5) |
|
||||
|
||||
### Examples
|
||||
|
||||
@@ -752,6 +756,9 @@ npx @toon-format/cli data.json --delimiter "|" --length-marker -o output.toon
|
||||
# Lenient decoding (skip validation)
|
||||
npx @toon-format/cli data.toon --no-strict -o output.json
|
||||
|
||||
# Key folding for nested data (v1.5)
|
||||
npx @toon-format/cli data.json --key-folding safe -o output.toon
|
||||
|
||||
# Stdin workflows
|
||||
echo '{"name": "Ada", "age": 30}' | npx @toon-format/cli --stats
|
||||
cat large-dataset.json | npx @toon-format/cli --delimiter "\t" > output.toon
|
||||
@@ -797,6 +804,40 @@ user:
|
||||
name: Ada
|
||||
```
|
||||
|
||||
### Key Folding (Optional)
|
||||
|
||||
New in v1.5: Optionally collapse single-key wrapper chains into dotted paths to reduce tokens. Enable with `keyFolding: 'safe'`.
|
||||
|
||||
Standard nesting:
|
||||
|
||||
```
|
||||
data:
|
||||
metadata:
|
||||
items[2]: a,b
|
||||
```
|
||||
|
||||
With key folding:
|
||||
|
||||
```
|
||||
data.metadata.items[2]: a,b
|
||||
```
|
||||
|
||||
Round-trip with path expansion:
|
||||
|
||||
```ts
|
||||
import { decode, encode } from '@toon-format/toon'
|
||||
|
||||
const original = { data: { metadata: { items: ['a', 'b'] } } }
|
||||
|
||||
const toon = encode(original, { keyFolding: 'safe' })
|
||||
// → "data.metadata.items[2]: a,b"
|
||||
|
||||
const restored = decode(toon, { expandPaths: 'safe' })
|
||||
// → Matches original structure
|
||||
```
|
||||
|
||||
See §13.4 in the [specification](https://github.com/toon-format/spec/blob/main/SPEC.md#134-key-folding-and-path-expansion) for folding rules and safety guarantees.
|
||||
|
||||
### Arrays
|
||||
|
||||
> [!TIP]
|
||||
@@ -975,6 +1016,8 @@ Converts any JSON-serializable value to TOON format.
|
||||
- `indent?: number` – Number of spaces per indentation level (default: `2`)
|
||||
- `delimiter?: ',' | '\t' | '|'` – Delimiter for array values and tabular rows (default: `','`)
|
||||
- `lengthMarker?: '#' | false` – Optional marker to prefix array lengths (default: `false`)
|
||||
- `keyFolding?: 'off' | 'safe'` – Enable key folding to collapse single-key wrapper chains into dotted paths (default: `'off'`). When `'safe'`, only valid identifier segments are folded (v1.5)
|
||||
- `flattenDepth?: number` – Maximum number of segments to fold when `keyFolding` is enabled (default: `Infinity`). Values 0-1 have no practical effect (v1.5)
|
||||
|
||||
**Returns:**
|
||||
|
||||
@@ -1096,6 +1139,7 @@ Converts a TOON-formatted string back to JavaScript values.
|
||||
- `options` – Optional decoding options:
|
||||
- `indent?: number` – Expected number of spaces per indentation level (default: `2`)
|
||||
- `strict?: boolean` – Enable strict validation (default: `true`)
|
||||
- `expandPaths?: 'off' | 'safe'` – Enable path expansion to reconstruct dotted keys into nested objects (default: `'off'`). Pairs with `keyFolding: 'safe'` for lossless round-trips (v1.5)
|
||||
|
||||
**Returns:**
|
||||
|
||||
@@ -1223,7 +1267,7 @@ Task: Return only users with role "user" as TOON. Use the same header. Set [N] t
|
||||
## Other Implementations
|
||||
|
||||
> [!NOTE]
|
||||
> When implementing TOON in other languages, please follow the [specification](https://github.com/toon-format/spec/blob/main/SPEC.md) (currently v1.4) to ensure compatibility across implementations. The [conformance tests](https://github.com/toon-format/spec/tree/main/tests) provide language-agnostic test fixtures that validate implementations across any language.
|
||||
> When implementing TOON in other languages, please follow the [specification](https://github.com/toon-format/spec/blob/main/SPEC.md) (currently v1.5) to ensure compatibility across implementations. The [conformance tests](https://github.com/toon-format/spec/tree/main/tests) provide language-agnostic test fixtures that validate your implementations.
|
||||
|
||||
### Official Implementations
|
||||
|
||||
|
||||
@@ -65,6 +65,9 @@ cat data.toon | toon --decode
|
||||
| `--length-marker` | Add `#` prefix to array lengths (e.g., `items[#3]`) |
|
||||
| `--stats` | Show token count estimates and savings (encode only) |
|
||||
| `--no-strict` | Disable strict validation when decoding |
|
||||
| `--key-folding <mode>` | Enable key folding: `off`, `safe` (default: `off`) - v1.5 |
|
||||
| `--flatten-depth <number>` | Maximum folded segment count when key folding is enabled (default: `Infinity`) - v1.5 |
|
||||
| `--expand-paths <mode>` | Enable path expansion: `off`, `safe` (default: `off`) - v1.5 |
|
||||
|
||||
## Advanced Examples
|
||||
|
||||
@@ -119,12 +122,81 @@ cat large-dataset.json | toon --delimiter "\t" > output.toon
|
||||
jq '.results' data.json | toon > filtered.toon
|
||||
```
|
||||
|
||||
### Key Folding (v1.5)
|
||||
|
||||
Collapse nested wrapper chains to reduce tokens:
|
||||
|
||||
#### Basic key folding
|
||||
|
||||
```bash
|
||||
# Encode with key folding
|
||||
toon input.json --key-folding safe -o output.toon
|
||||
```
|
||||
|
||||
For data like:
|
||||
```json
|
||||
{
|
||||
"data": {
|
||||
"metadata": {
|
||||
"items": ["a", "b"]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Output becomes:
|
||||
```
|
||||
data.metadata.items[2]: a,b
|
||||
```
|
||||
|
||||
Instead of:
|
||||
```
|
||||
data:
|
||||
metadata:
|
||||
items[2]: a,b
|
||||
```
|
||||
|
||||
#### Limit folding depth
|
||||
|
||||
```bash
|
||||
# Fold maximum 2 levels deep
|
||||
toon input.json --key-folding safe --flatten-depth 2 -o output.toon
|
||||
```
|
||||
|
||||
#### Path expansion on decode
|
||||
|
||||
```bash
|
||||
# Reconstruct nested structure from folded keys
|
||||
toon data.toon --expand-paths safe -o output.json
|
||||
```
|
||||
|
||||
#### Round-trip workflow
|
||||
|
||||
```bash
|
||||
# Encode with folding
|
||||
toon input.json --key-folding safe -o compressed.toon
|
||||
|
||||
# Decode with expansion (restores original structure)
|
||||
toon compressed.toon --expand-paths safe -o output.json
|
||||
|
||||
# Verify round-trip
|
||||
diff input.json output.json
|
||||
```
|
||||
|
||||
#### Combined with other options
|
||||
|
||||
```bash
|
||||
# Key folding + tab delimiter + stats
|
||||
toon data.json --key-folding safe --delimiter "\t" --stats -o output.toon
|
||||
```
|
||||
|
||||
## Why Use the CLI?
|
||||
|
||||
- **Quick conversions** between formats without writing code
|
||||
- **Token analysis** to see potential savings before sending to LLMs
|
||||
- **Pipeline integration** with existing JSON-based workflows
|
||||
- **Flexible formatting** with delimiter and indentation options
|
||||
- **Key folding (v1.5)** to collapse nested wrappers for additional token savings
|
||||
|
||||
## Related
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import type { DecodeOptions, Delimiter, EncodeOptions } from '../../toon/src'
|
||||
import type { DecodeOptions, EncodeOptions } from '../../toon/src'
|
||||
import type { InputSource } from './types'
|
||||
import * as fsp from 'node:fs/promises'
|
||||
import * as path from 'node:path'
|
||||
@@ -11,9 +11,11 @@ import { formatInputLabel, readInput } from './utils'
|
||||
export async function encodeToToon(config: {
|
||||
input: InputSource
|
||||
output?: string
|
||||
delimiter: Delimiter
|
||||
indent: number
|
||||
indent: NonNullable<EncodeOptions['indent']>
|
||||
delimiter: NonNullable<EncodeOptions['delimiter']>
|
||||
lengthMarker: NonNullable<EncodeOptions['lengthMarker']>
|
||||
keyFolding?: NonNullable<EncodeOptions['keyFolding']>
|
||||
flattenDepth?: number
|
||||
printStats: boolean
|
||||
}): Promise<void> {
|
||||
const jsonContent = await readInput(config.input)
|
||||
@@ -30,6 +32,8 @@ export async function encodeToToon(config: {
|
||||
delimiter: config.delimiter,
|
||||
indent: config.indent,
|
||||
lengthMarker: config.lengthMarker,
|
||||
keyFolding: config.keyFolding,
|
||||
flattenDepth: config.flattenDepth,
|
||||
}
|
||||
|
||||
const toonOutput = encode(data, encodeOptions)
|
||||
@@ -59,8 +63,9 @@ export async function encodeToToon(config: {
|
||||
export async function decodeToJson(config: {
|
||||
input: InputSource
|
||||
output?: string
|
||||
indent: number
|
||||
strict: boolean
|
||||
indent: NonNullable<DecodeOptions['indent']>
|
||||
strict: NonNullable<DecodeOptions['strict']>
|
||||
expandPaths?: NonNullable<DecodeOptions['expandPaths']>
|
||||
}): Promise<void> {
|
||||
const toonContent = await readInput(config.input)
|
||||
|
||||
@@ -69,6 +74,7 @@ export async function decodeToJson(config: {
|
||||
const decodeOptions: DecodeOptions = {
|
||||
indent: config.indent,
|
||||
strict: config.strict,
|
||||
expandPaths: config.expandPaths,
|
||||
}
|
||||
data = decode(toonContent, decodeOptions)
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import type { CommandDef } from 'citty'
|
||||
import type { Delimiter } from '../../toon/src'
|
||||
import type { DecodeOptions, Delimiter, EncodeOptions } from '../../toon/src'
|
||||
import type { InputSource } from './types'
|
||||
import * as path from 'node:path'
|
||||
import process from 'node:process'
|
||||
@@ -51,6 +51,20 @@ export const mainCommand: CommandDef<{
|
||||
description: string
|
||||
default: true
|
||||
}
|
||||
keyFolding: {
|
||||
type: 'string'
|
||||
description: string
|
||||
default: string
|
||||
}
|
||||
flattenDepth: {
|
||||
type: 'string'
|
||||
description: string
|
||||
}
|
||||
expandPaths: {
|
||||
type: 'string'
|
||||
description: string
|
||||
default: string
|
||||
}
|
||||
stats: {
|
||||
type: 'boolean'
|
||||
description: string
|
||||
@@ -103,6 +117,20 @@ export const mainCommand: CommandDef<{
|
||||
description: 'Enable strict mode for decoding',
|
||||
default: true,
|
||||
},
|
||||
keyFolding: {
|
||||
type: 'string',
|
||||
description: 'Enable key folding: off, safe (default: off)',
|
||||
default: 'off',
|
||||
},
|
||||
flattenDepth: {
|
||||
type: 'string',
|
||||
description: 'Maximum folded segment count when key folding is enabled (default: Infinity)',
|
||||
},
|
||||
expandPaths: {
|
||||
type: 'string',
|
||||
description: 'Enable path expansion: off, safe (default: off)',
|
||||
default: 'off',
|
||||
},
|
||||
stats: {
|
||||
type: 'boolean',
|
||||
description: 'Show token statistics',
|
||||
@@ -129,6 +157,27 @@ export const mainCommand: CommandDef<{
|
||||
throw new Error(`Invalid delimiter "${delimiter}". Valid delimiters are: comma (,), tab (\\t), pipe (|)`)
|
||||
}
|
||||
|
||||
// Validate `keyFolding`
|
||||
const keyFolding = args.keyFolding || 'off'
|
||||
if (keyFolding !== 'off' && keyFolding !== 'safe') {
|
||||
throw new Error(`Invalid keyFolding value "${keyFolding}". Valid values are: off, safe`)
|
||||
}
|
||||
|
||||
// Parse and validate `flattenDepth`
|
||||
let flattenDepth: number | undefined
|
||||
if (args.flattenDepth !== undefined) {
|
||||
flattenDepth = Number.parseInt(args.flattenDepth, 10)
|
||||
if (Number.isNaN(flattenDepth) || flattenDepth < 0) {
|
||||
throw new Error(`Invalid flattenDepth value: ${args.flattenDepth}`)
|
||||
}
|
||||
}
|
||||
|
||||
// Validate `expandPaths`
|
||||
const expandPaths = args.expandPaths || 'off'
|
||||
if (expandPaths !== 'off' && expandPaths !== 'safe') {
|
||||
throw new Error(`Invalid expandPaths value "${expandPaths}". Valid values are: off, safe`)
|
||||
}
|
||||
|
||||
const mode = detectMode(inputSource, args.encode, args.decode)
|
||||
|
||||
try {
|
||||
@@ -140,6 +189,8 @@ export const mainCommand: CommandDef<{
|
||||
indent,
|
||||
lengthMarker: args.lengthMarker === true ? '#' : false,
|
||||
printStats: args.stats === true,
|
||||
keyFolding: keyFolding as NonNullable<EncodeOptions['keyFolding']>,
|
||||
flattenDepth,
|
||||
})
|
||||
}
|
||||
else {
|
||||
@@ -148,6 +199,7 @@ export const mainCommand: CommandDef<{
|
||||
output: outputPath,
|
||||
indent,
|
||||
strict: args.strict !== false,
|
||||
expandPaths: expandPaths as NonNullable<DecodeOptions['expandPaths']>,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,6 +12,7 @@ export const COLON = ':'
|
||||
export const SPACE = ' '
|
||||
export const PIPE = '|'
|
||||
export const HASH = '#'
|
||||
export const DOT = '.'
|
||||
|
||||
// #endregion
|
||||
|
||||
|
||||
196
packages/toon/src/decode/expand.ts
Normal file
196
packages/toon/src/decode/expand.ts
Normal file
@@ -0,0 +1,196 @@
|
||||
import type { JsonObject, JsonValue } from '../types'
|
||||
import { DOT } from '../constants'
|
||||
import { isJsonObject } from '../encode/normalize'
|
||||
import { isIdentifierSegment } from '../shared/validation'
|
||||
|
||||
// #region Path expansion (safe)
|
||||
|
||||
/**
|
||||
* Checks if two values can be merged (both are plain objects).
|
||||
*/
|
||||
function canMerge(a: JsonValue, b: JsonValue): a is JsonObject {
|
||||
return isJsonObject(a) && isJsonObject(b)
|
||||
}
|
||||
|
||||
/**
|
||||
* Expands dotted keys into nested objects in safe mode.
|
||||
*
|
||||
* @remarks
|
||||
* This function recursively traverses a decoded TOON value and expands any keys
|
||||
* containing dots (`.`) into nested object structures, provided all segments
|
||||
* are valid identifiers.
|
||||
*
|
||||
* Expansion rules:
|
||||
* - Keys containing dots are split into segments
|
||||
* - All segments must pass `isIdentifierSegment` validation
|
||||
* - Non-eligible keys (with special characters) are left as literal dotted keys
|
||||
* - Deep merge: When multiple dotted keys expand to the same path, their values are merged if both are objects
|
||||
* - Conflict handling:
|
||||
* - `strict=true`: Throws TypeError on conflicts (non-object collision)
|
||||
* - `strict=false`: LWW (silent overwrite)
|
||||
*
|
||||
* @param value - The decoded value to expand
|
||||
* @param strict - Whether to throw errors on conflicts
|
||||
* @returns The expanded value with dotted keys reconstructed as nested objects
|
||||
* @throws TypeError if conflicts occur in strict mode
|
||||
*/
|
||||
export function expandPathsSafe(value: JsonValue, strict: boolean): JsonValue {
|
||||
if (Array.isArray(value)) {
|
||||
// Recursively expand array elements
|
||||
return value.map(item => expandPathsSafe(item, strict))
|
||||
}
|
||||
|
||||
if (isJsonObject(value)) {
|
||||
const result: JsonObject = {}
|
||||
const keys = Object.keys(value)
|
||||
|
||||
for (const key of keys) {
|
||||
const val = value[key]!
|
||||
|
||||
// Check if key contains dots
|
||||
if (key.includes(DOT)) {
|
||||
const segments = key.split(DOT)
|
||||
|
||||
// Validate all segments are identifiers
|
||||
if (segments.every(seg => isIdentifierSegment(seg))) {
|
||||
// Expand this dotted key
|
||||
const expandedValue = expandPathsSafe(val, strict)
|
||||
insertPathSafe(result, segments, expandedValue, strict)
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
// Not expandable - keep as literal key, but still recursively expand the value
|
||||
result[key] = expandPathsSafe(val, strict)
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// Primitive value - return as-is
|
||||
return value
|
||||
}
|
||||
|
||||
/**
|
||||
* Inserts a value at a nested path, creating intermediate objects as needed.
|
||||
*
|
||||
* @remarks
|
||||
* This function walks the segment path, creating nested objects as needed.
|
||||
* When an existing value is encountered:
|
||||
* - If both are objects: deep merge (continue insertion)
|
||||
* - If values differ: conflict
|
||||
* - strict=true: throw TypeError
|
||||
* - strict=false: overwrite with new value (last-wins)
|
||||
*
|
||||
* @param target - The object to insert into
|
||||
* @param segments - Array of path segments (e.g., ['data', 'metadata', 'items'])
|
||||
* @param value - The value to insert at the end of the path
|
||||
* @param strict - Whether to throw on conflicts
|
||||
* @throws TypeError if a conflict occurs in strict mode
|
||||
*/
|
||||
function insertPathSafe(
|
||||
target: JsonObject,
|
||||
segments: readonly string[],
|
||||
value: JsonValue,
|
||||
strict: boolean,
|
||||
): void {
|
||||
let current: JsonObject = target
|
||||
|
||||
// Walk to the penultimate segment, creating objects as needed
|
||||
for (let i = 0; i < segments.length - 1; i++) {
|
||||
const seg = segments[i]!
|
||||
const existing = current[seg]
|
||||
|
||||
if (existing === undefined) {
|
||||
// Create new intermediate object
|
||||
const newObj: JsonObject = {}
|
||||
current[seg] = newObj
|
||||
current = newObj
|
||||
}
|
||||
else if (isJsonObject(existing)) {
|
||||
// Continue into existing object
|
||||
current = existing
|
||||
}
|
||||
else {
|
||||
// Conflict: existing value is not an object
|
||||
if (strict) {
|
||||
throw new TypeError(
|
||||
`Path expansion conflict at segment "${seg}": expected object but found ${typeof existing}`,
|
||||
)
|
||||
}
|
||||
// Non-strict: overwrite with new object
|
||||
const newObj: JsonObject = {}
|
||||
current[seg] = newObj
|
||||
current = newObj
|
||||
}
|
||||
}
|
||||
|
||||
// Insert at the final segment
|
||||
const lastSeg = segments[segments.length - 1]!
|
||||
const existing = current[lastSeg]
|
||||
|
||||
if (existing === undefined) {
|
||||
// No conflict - insert directly
|
||||
current[lastSeg] = value
|
||||
}
|
||||
else if (canMerge(existing, value)) {
|
||||
// Both are objects - deep merge
|
||||
mergeObjects(existing as JsonObject, value as JsonObject, strict)
|
||||
}
|
||||
else {
|
||||
// Conflict: incompatible types
|
||||
if (strict) {
|
||||
throw new TypeError(
|
||||
`Path expansion conflict at key "${lastSeg}": cannot merge ${typeof existing} with ${typeof value}`,
|
||||
)
|
||||
}
|
||||
// Non-strict: overwrite (LWW)
|
||||
current[lastSeg] = value
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Deep merges properties from source into target.
|
||||
*
|
||||
* @remarks
|
||||
* For each key in source:
|
||||
* - If key doesn't exist in target: copy it
|
||||
* - If both values are objects: recursively merge
|
||||
* - Otherwise: conflict (strict throws, non-strict overwrites)
|
||||
*
|
||||
* @param target - The target object to merge into
|
||||
* @param source - The source object to merge from
|
||||
* @param strict - Whether to throw on conflicts
|
||||
* @throws TypeError if a conflict occurs in strict mode
|
||||
*/
|
||||
function mergeObjects(
|
||||
target: JsonObject,
|
||||
source: JsonObject,
|
||||
strict: boolean,
|
||||
): void {
|
||||
for (const key of Object.keys(source)) {
|
||||
const sourceValue = source[key]!
|
||||
const targetValue = target[key]
|
||||
|
||||
if (targetValue === undefined) {
|
||||
// Key doesn't exist in target - copy it
|
||||
target[key] = sourceValue
|
||||
}
|
||||
else if (canMerge(targetValue, sourceValue)) {
|
||||
// Both are objects - recursively merge
|
||||
mergeObjects(targetValue as JsonObject, sourceValue as JsonObject, strict)
|
||||
}
|
||||
else {
|
||||
// Conflict: incompatible types
|
||||
if (strict) {
|
||||
throw new TypeError(
|
||||
`Path expansion conflict at key "${key}": cannot merge ${typeof targetValue} with ${typeof sourceValue}`,
|
||||
)
|
||||
}
|
||||
// Non-strict: overwrite (LWW)
|
||||
target[key] = sourceValue
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// #endregion
|
||||
@@ -1,5 +1,6 @@
|
||||
import type { Depth, JsonArray, JsonObject, JsonPrimitive, JsonValue, ResolvedEncodeOptions } from '../types'
|
||||
import { LIST_ITEM_MARKER } from '../constants'
|
||||
import { tryFoldKeyChain } from './folding'
|
||||
import { isArrayOfArrays, isArrayOfObjects, isArrayOfPrimitives, isJsonArray, isJsonObject, isJsonPrimitive } from './normalize'
|
||||
import { encodeAndJoinPrimitives, encodeKey, encodePrimitive, formatHeader } from './primitives'
|
||||
import { LineWriter } from './writer'
|
||||
@@ -31,11 +32,46 @@ export function encodeObject(value: JsonObject, writer: LineWriter, depth: Depth
|
||||
const keys = Object.keys(value)
|
||||
|
||||
for (const key of keys) {
|
||||
encodeKeyValuePair(key, value[key]!, writer, depth, options)
|
||||
encodeKeyValuePair(key, value[key]!, writer, depth, options, keys)
|
||||
}
|
||||
}
|
||||
|
||||
export function encodeKeyValuePair(key: string, value: JsonValue, writer: LineWriter, depth: Depth, options: ResolvedEncodeOptions): void {
|
||||
export function encodeKeyValuePair(key: string, value: JsonValue, writer: LineWriter, depth: Depth, options: ResolvedEncodeOptions, siblings?: readonly string[]): void {
|
||||
// Attempt key folding when enabled
|
||||
if (options.keyFolding === 'safe' && siblings) {
|
||||
const foldResult = tryFoldKeyChain(key, value, siblings, options)
|
||||
|
||||
if (foldResult) {
|
||||
const { foldedKey, remainder, leafValue } = foldResult
|
||||
const encodedFoldedKey = encodeKey(foldedKey)
|
||||
|
||||
// Case 1: Fully folded to a leaf value
|
||||
if (remainder === undefined) {
|
||||
// The folded chain ended at a leaf (primitive, array, or empty object)
|
||||
if (isJsonPrimitive(leafValue)) {
|
||||
writer.push(depth, `${encodedFoldedKey}: ${encodePrimitive(leafValue, options.delimiter)}`)
|
||||
return
|
||||
}
|
||||
else if (isJsonArray(leafValue)) {
|
||||
encodeArray(foldedKey, leafValue, writer, depth, options)
|
||||
return
|
||||
}
|
||||
else if (isJsonObject(leafValue) && Object.keys(leafValue).length === 0) {
|
||||
writer.push(depth, `${encodedFoldedKey}:`)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// Case 2: Partially folded with a tail object
|
||||
if (isJsonObject(remainder)) {
|
||||
writer.push(depth, `${encodedFoldedKey}:`)
|
||||
encodeObject(remainder, writer, depth + 1, options)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// No folding applied - use standard encoding
|
||||
const encodedKey = encodeKey(key)
|
||||
|
||||
if (isJsonPrimitive(value)) {
|
||||
|
||||
173
packages/toon/src/encode/folding.ts
Normal file
173
packages/toon/src/encode/folding.ts
Normal file
@@ -0,0 +1,173 @@
|
||||
import type { JsonValue, ResolvedEncodeOptions } from '../types'
|
||||
import { DOT } from '../constants'
|
||||
import { isIdentifierSegment } from '../shared/validation'
|
||||
import { isJsonObject } from './normalize'
|
||||
|
||||
// #region Key folding helpers
|
||||
|
||||
/**
|
||||
* Result of attempting to fold a key chain.
|
||||
*/
|
||||
export interface FoldResult {
|
||||
/**
|
||||
* The folded key with dot-separated segments (e.g., "data.metadata.items")
|
||||
*/
|
||||
foldedKey: string
|
||||
/**
|
||||
* The remainder value after folding:
|
||||
* - `undefined` if the chain was fully folded to a leaf (primitive, array, or empty object)
|
||||
* - An object if the chain was partially folded (depth limit reached with nested tail)
|
||||
*/
|
||||
remainder?: JsonValue
|
||||
/**
|
||||
* The leaf value at the end of the folded chain.
|
||||
* Used to avoid redundant traversal when encoding the folded value.
|
||||
*/
|
||||
leafValue: JsonValue
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempts to fold a single-key object chain into a dotted path.
|
||||
*
|
||||
* @remarks
|
||||
* Folding traverses nested objects with single keys, collapsing them into a dotted path.
|
||||
* It stops when:
|
||||
* - A non-single-key object is encountered
|
||||
* - An array is encountered (arrays are not "single-key objects")
|
||||
* - A primitive value is reached
|
||||
* - The flatten depth limit is reached
|
||||
* - Any segment fails safe mode validation
|
||||
*
|
||||
* Safe mode requirements:
|
||||
* - `options.keyFolding` must be `'safe'`
|
||||
* - Every segment must be a valid identifier (no dots, no special chars)
|
||||
* - The folded key must not collide with existing sibling keys
|
||||
* - No segment should require quoting
|
||||
*
|
||||
* @param key - The starting key to fold
|
||||
* @param value - The value associated with the key
|
||||
* @param siblings - Array of all sibling keys at this level (for collision detection)
|
||||
* @param options - Resolved encoding options
|
||||
* @returns A FoldResult if folding is possible, undefined otherwise
|
||||
*/
|
||||
export function tryFoldKeyChain(
|
||||
key: string,
|
||||
value: JsonValue,
|
||||
siblings: readonly string[],
|
||||
options: ResolvedEncodeOptions,
|
||||
): FoldResult | undefined {
|
||||
// Only fold when safe mode is enabled
|
||||
if (options.keyFolding !== 'safe') {
|
||||
return undefined
|
||||
}
|
||||
|
||||
// Can only fold objects
|
||||
if (!isJsonObject(value)) {
|
||||
return undefined
|
||||
}
|
||||
|
||||
// Collect the chain of single-key objects
|
||||
const { segments, tail, leafValue } = collectSingleKeyChain(key, value, options.flattenDepth)
|
||||
|
||||
// Need at least 2 segments for folding to be worthwhile
|
||||
if (segments.length < 2) {
|
||||
return undefined
|
||||
}
|
||||
|
||||
// Validate all segments are safe identifiers
|
||||
if (!segments.every(seg => isIdentifierSegment(seg))) {
|
||||
return undefined
|
||||
}
|
||||
|
||||
// Build the folded key
|
||||
const foldedKey = buildFoldedKey(segments)
|
||||
|
||||
// Check for collision with existing literal sibling keys (inline check)
|
||||
if (siblings.includes(foldedKey)) {
|
||||
return undefined
|
||||
}
|
||||
|
||||
return {
|
||||
foldedKey,
|
||||
remainder: tail,
|
||||
leafValue,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Collects a chain of single-key objects into segments.
|
||||
*
|
||||
* @remarks
|
||||
* Traverses nested objects, collecting keys until:
|
||||
* - A non-single-key object is found
|
||||
* - An array is encountered
|
||||
* - A primitive is reached
|
||||
* - An empty object is reached
|
||||
* - The depth limit is reached
|
||||
*
|
||||
* @param startKey - The initial key to start the chain
|
||||
* @param startValue - The value to traverse
|
||||
* @param maxDepth - Maximum number of segments to collect
|
||||
* @returns Object containing segments array, tail value, and leaf value
|
||||
*/
|
||||
function collectSingleKeyChain(
|
||||
startKey: string,
|
||||
startValue: JsonValue,
|
||||
maxDepth: number,
|
||||
): { segments: string[], tail: JsonValue | undefined, leafValue: JsonValue } {
|
||||
const segments: string[] = [startKey]
|
||||
let current = startValue
|
||||
|
||||
while (segments.length < maxDepth) {
|
||||
// Must be an object to continue
|
||||
if (!isJsonObject(current)) {
|
||||
break
|
||||
}
|
||||
|
||||
const keys = Object.keys(current)
|
||||
|
||||
// Must have exactly one key to continue the chain
|
||||
if (keys.length !== 1) {
|
||||
break
|
||||
}
|
||||
|
||||
const nextKey = keys[0]!
|
||||
const nextValue = current[nextKey]!
|
||||
|
||||
segments.push(nextKey)
|
||||
current = nextValue
|
||||
}
|
||||
|
||||
// Determine the tail - simplified with early returns
|
||||
if (!isJsonObject(current)) {
|
||||
// Array, primitive, or null - this is a leaf value
|
||||
return { segments, tail: undefined, leafValue: current }
|
||||
}
|
||||
|
||||
const keys = Object.keys(current)
|
||||
|
||||
if (keys.length === 0) {
|
||||
// Empty object is a leaf
|
||||
return { segments, tail: undefined, leafValue: current }
|
||||
}
|
||||
|
||||
if (keys.length === 1 && segments.length === maxDepth) {
|
||||
// Hit depth limit with remaining chain
|
||||
return { segments, tail: current, leafValue: current }
|
||||
}
|
||||
|
||||
// Multi-key object is the remainder
|
||||
return { segments, tail: current, leafValue: current }
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds a folded key from segments.
|
||||
*
|
||||
* @param segments - Array of key segments
|
||||
* @returns Dot-separated key string
|
||||
*/
|
||||
function buildFoldedKey(segments: readonly string[]): string {
|
||||
return segments.join(DOT)
|
||||
}
|
||||
|
||||
// #endregion
|
||||
@@ -1,6 +1,7 @@
|
||||
import type { DecodeOptions, EncodeOptions, JsonValue, ResolvedDecodeOptions, ResolvedEncodeOptions } from './types'
|
||||
import { DEFAULT_DELIMITER } from './constants'
|
||||
import { decodeValueFromLines } from './decode/decoders'
|
||||
import { expandPathsSafe } from './decode/expand'
|
||||
import { LineCursor, toParsedLines } from './decode/scanner'
|
||||
import { encodeValue } from './encode/encoders'
|
||||
import { normalizeValue } from './encode/normalize'
|
||||
@@ -34,7 +35,14 @@ export function decode(input: string, options?: DecodeOptions): JsonValue {
|
||||
}
|
||||
|
||||
const cursor = new LineCursor(scanResult.lines, scanResult.blankLines)
|
||||
return decodeValueFromLines(cursor, resolvedOptions)
|
||||
const value = decodeValueFromLines(cursor, resolvedOptions)
|
||||
|
||||
// Apply path expansion if enabled
|
||||
if (resolvedOptions.expandPaths === 'safe') {
|
||||
return expandPathsSafe(value, resolvedOptions.strict)
|
||||
}
|
||||
|
||||
return value
|
||||
}
|
||||
|
||||
function resolveOptions(options?: EncodeOptions): ResolvedEncodeOptions {
|
||||
@@ -42,6 +50,8 @@ function resolveOptions(options?: EncodeOptions): ResolvedEncodeOptions {
|
||||
indent: options?.indent ?? 2,
|
||||
delimiter: options?.delimiter ?? DEFAULT_DELIMITER,
|
||||
lengthMarker: options?.lengthMarker ?? false,
|
||||
keyFolding: options?.keyFolding ?? 'off',
|
||||
flattenDepth: options?.flattenDepth ?? Number.POSITIVE_INFINITY,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -49,5 +59,6 @@ function resolveDecodeOptions(options?: DecodeOptions): ResolvedDecodeOptions {
|
||||
return {
|
||||
indent: options?.indent ?? 2,
|
||||
strict: options?.strict ?? true,
|
||||
expandPaths: options?.expandPaths ?? 'off',
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,6 +12,19 @@ export function isValidUnquotedKey(key: string): boolean {
|
||||
return /^[A-Z_][\w.]*$/i.test(key)
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if a key segment is a valid identifier for safe folding/expansion.
|
||||
*
|
||||
* @remarks
|
||||
* Identifier segments are more restrictive than unquoted keys:
|
||||
* - Must start with a letter or underscore
|
||||
* - Followed only by letters, digits, or underscores (no dots)
|
||||
* - Used for safe key folding and path expansion
|
||||
*/
|
||||
export function isIdentifierSegment(key: string): boolean {
|
||||
return /^[A-Z_]\w*$/i.test(key)
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines if a string value can be safely encoded without quotes.
|
||||
*
|
||||
|
||||
@@ -30,6 +30,20 @@ export interface EncodeOptions {
|
||||
* @default false
|
||||
*/
|
||||
lengthMarker?: '#' | false
|
||||
/**
|
||||
* Enable key folding to collapse single-key wrapper chains.
|
||||
* When set to 'safe', nested objects with single keys are collapsed into dotted paths
|
||||
* (e.g., data.metadata.items instead of nested indentation).
|
||||
* @default 'off'
|
||||
*/
|
||||
keyFolding?: 'off' | 'safe'
|
||||
/**
|
||||
* Maximum number of segments to fold when keyFolding is enabled.
|
||||
* Controls how deep the folding can go in single-key chains.
|
||||
* Values 0 or 1 have no practical effect (treated as effectively disabled).
|
||||
* @default Infinity
|
||||
*/
|
||||
flattenDepth?: number
|
||||
}
|
||||
|
||||
export type ResolvedEncodeOptions = Readonly<Required<EncodeOptions>>
|
||||
@@ -49,6 +63,14 @@ export interface DecodeOptions {
|
||||
* @default true
|
||||
*/
|
||||
strict?: boolean
|
||||
/**
|
||||
* Enable path expansion to reconstruct dotted keys into nested objects.
|
||||
* When set to 'safe', keys containing dots are expanded into nested structures
|
||||
* if all segments are valid identifiers (e.g., data.metadata.items becomes nested objects).
|
||||
* Pairs with keyFolding='safe' for lossless round-trips.
|
||||
* @default 'off'
|
||||
*/
|
||||
expandPaths?: 'off' | 'safe'
|
||||
}
|
||||
|
||||
export type ResolvedDecodeOptions = Readonly<Required<DecodeOptions>>
|
||||
|
||||
@@ -7,6 +7,7 @@ import delimiters from '@toon-format/spec/tests/fixtures/decode/delimiters.json'
|
||||
import indentationErrors from '@toon-format/spec/tests/fixtures/decode/indentation-errors.json'
|
||||
import numbers from '@toon-format/spec/tests/fixtures/decode/numbers.json'
|
||||
import objects from '@toon-format/spec/tests/fixtures/decode/objects.json'
|
||||
import pathExpansion from '@toon-format/spec/tests/fixtures/decode/path-expansion.json'
|
||||
import primitives from '@toon-format/spec/tests/fixtures/decode/primitives.json'
|
||||
import rootForm from '@toon-format/spec/tests/fixtures/decode/root-form.json'
|
||||
import validationErrors from '@toon-format/spec/tests/fixtures/decode/validation-errors.json'
|
||||
@@ -21,6 +22,7 @@ const fixtureFiles = [
|
||||
arraysPrimitive,
|
||||
arraysTabular,
|
||||
arraysNested,
|
||||
pathExpansion,
|
||||
delimiters,
|
||||
whitespace,
|
||||
rootForm,
|
||||
|
||||
@@ -5,6 +5,7 @@ import arraysObjects from '@toon-format/spec/tests/fixtures/encode/arrays-object
|
||||
import arraysPrimitive from '@toon-format/spec/tests/fixtures/encode/arrays-primitive.json'
|
||||
import arraysTabular from '@toon-format/spec/tests/fixtures/encode/arrays-tabular.json'
|
||||
import delimiters from '@toon-format/spec/tests/fixtures/encode/delimiters.json'
|
||||
import keyFolding from '@toon-format/spec/tests/fixtures/encode/key-folding.json'
|
||||
import objects from '@toon-format/spec/tests/fixtures/encode/objects.json'
|
||||
import options from '@toon-format/spec/tests/fixtures/encode/options.json'
|
||||
import primitives from '@toon-format/spec/tests/fixtures/encode/primitives.json'
|
||||
@@ -19,6 +20,7 @@ const fixtureFiles = [
|
||||
arraysTabular,
|
||||
arraysNested,
|
||||
arraysObjects,
|
||||
keyFolding,
|
||||
delimiters,
|
||||
whitespace,
|
||||
options,
|
||||
@@ -48,5 +50,7 @@ function resolveEncodeOptions(options?: TestCase['options']): ResolvedEncodeOpti
|
||||
indent: options?.indent ?? 2,
|
||||
delimiter: options?.delimiter ?? DEFAULT_DELIMITER,
|
||||
lengthMarker: options?.lengthMarker === '#' ? '#' : false,
|
||||
keyFolding: options?.keyFolding ?? 'off',
|
||||
flattenDepth: options?.flattenDepth ?? Number.POSITIVE_INFINITY,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,8 +13,11 @@ export interface TestCase {
|
||||
options?: {
|
||||
delimiter?: ',' | '\t' | '|'
|
||||
indent?: number
|
||||
lengthMarker?: '#' | ''
|
||||
lengthMarker?: '#'
|
||||
strict?: boolean
|
||||
keyFolding?: 'off' | 'safe'
|
||||
flattenDepth?: number
|
||||
expandPaths?: 'off' | 'safe'
|
||||
}
|
||||
specSection?: string
|
||||
note?: string
|
||||
|
||||
Reference in New Issue
Block a user