mirror of
https://github.com/voson-wang/toon.git
synced 2026-01-29 23:34:10 +08:00
feat!: remove optional length marker option [#N] in favor of [N]
This commit is contained in:
51
README.md
51
README.md
@@ -4,7 +4,7 @@
|
|||||||
|
|
||||||
[](https://github.com/toon-format/toon/actions)
|
[](https://github.com/toon-format/toon/actions)
|
||||||
[](https://www.npmjs.com/package/@toon-format/toon)
|
[](https://www.npmjs.com/package/@toon-format/toon)
|
||||||
[](https://github.com/toon-format/spec)
|
[](https://github.com/toon-format/spec)
|
||||||
[](https://www.npmjs.com/package/@toon-format/toon)
|
[](https://www.npmjs.com/package/@toon-format/toon)
|
||||||
[](./LICENSE)
|
[](./LICENSE)
|
||||||
|
|
||||||
@@ -67,7 +67,7 @@ TOON excels with uniform arrays of objects, but there are cases where other form
|
|||||||
|
|
||||||
- **Deeply nested or non-uniform structures** (tabular eligibility ≈ 0%): JSON-compact often uses fewer tokens. Example: complex configuration objects with many nested levels.
|
- **Deeply nested or non-uniform structures** (tabular eligibility ≈ 0%): JSON-compact often uses fewer tokens. Example: complex configuration objects with many nested levels.
|
||||||
- **Semi-uniform arrays** (~40–60% tabular eligibility): Token savings diminish. Prefer JSON if your pipelines already rely on it.
|
- **Semi-uniform arrays** (~40–60% tabular eligibility): Token savings diminish. Prefer JSON if your pipelines already rely on it.
|
||||||
- **Flat CSV use-cases**: CSV is smaller than TOON for pure tabular data. TOON adds minimal overhead (~5-10%) to provide structure (length markers, field headers, delimiter scoping) that improves LLM reliability.
|
- **Flat CSV use-cases**: CSV is smaller than TOON for pure tabular data. TOON adds minimal overhead (~5-10%) to provide structure (array length declarations, field headers, delimiter scoping) that improves LLM reliability.
|
||||||
|
|
||||||
See [benchmarks](#benchmarks) for concrete comparisons across different data structures.
|
See [benchmarks](#benchmarks) for concrete comparisons across different data structures.
|
||||||
|
|
||||||
@@ -80,7 +80,7 @@ See [benchmarks](#benchmarks) for concrete comparisons across different data str
|
|||||||
- 🍱 **Minimal syntax:** removes redundant punctuation (braces, brackets, most quotes)
|
- 🍱 **Minimal syntax:** removes redundant punctuation (braces, brackets, most quotes)
|
||||||
- 📐 **Indentation-based structure:** like YAML, uses whitespace instead of braces
|
- 📐 **Indentation-based structure:** like YAML, uses whitespace instead of braces
|
||||||
- 🧺 **Tabular arrays:** declare keys once, stream data as rows
|
- 🧺 **Tabular arrays:** declare keys once, stream data as rows
|
||||||
- 🔗 **Optional key folding (spec v1.5):** collapses single-key wrapper chains into dotted paths (e.g., `data.metadata.items`) to reduce indentation and tokens
|
- 🔗 **Optional key folding:** collapses single-key wrapper chains into dotted paths (e.g., `data.metadata.items`) to reduce indentation and tokens
|
||||||
|
|
||||||
[^1]: For flat tabular data, CSV is more compact. TOON adds minimal overhead to provide explicit structure and validation that improves LLM reliability.
|
[^1]: For flat tabular data, CSV is more compact. TOON adds minimal overhead to provide explicit structure and validation that improves LLM reliability.
|
||||||
|
|
||||||
@@ -734,7 +734,6 @@ cat data.toon | npx @toon-format/cli --decode
|
|||||||
| `-d, --decode` | Force decode mode (overrides auto-detection) |
|
| `-d, --decode` | Force decode mode (overrides auto-detection) |
|
||||||
| `--delimiter <char>` | Array delimiter: `,` (comma), `\t` (tab), `\|` (pipe) |
|
| `--delimiter <char>` | Array delimiter: `,` (comma), `\t` (tab), `\|` (pipe) |
|
||||||
| `--indent <number>` | Indentation size (default: `2`) |
|
| `--indent <number>` | Indentation size (default: `2`) |
|
||||||
| `--length-marker` | Add `#` prefix to array lengths (e.g., `items[#3]`) |
|
|
||||||
| `--stats` | Show token count estimates and savings (encode only) |
|
| `--stats` | Show token count estimates and savings (encode only) |
|
||||||
| `--no-strict` | Disable strict validation when decoding |
|
| `--no-strict` | Disable strict validation when decoding |
|
||||||
| `--key-folding <mode>` | Key folding mode: `off`, `safe` (default: `off`) - collapses nested chains |
|
| `--key-folding <mode>` | Key folding mode: `off`, `safe` (default: `off`) - collapses nested chains |
|
||||||
@@ -750,13 +749,13 @@ npx @toon-format/cli data.json --stats -o output.toon
|
|||||||
# Tab-separated output (often more token-efficient)
|
# Tab-separated output (often more token-efficient)
|
||||||
npx @toon-format/cli data.json --delimiter "\t" -o output.toon
|
npx @toon-format/cli data.json --delimiter "\t" -o output.toon
|
||||||
|
|
||||||
# Pipe-separated with length markers
|
# Pipe-separated output
|
||||||
npx @toon-format/cli data.json --delimiter "|" --length-marker -o output.toon
|
npx @toon-format/cli data.json --delimiter "|" -o output.toon
|
||||||
|
|
||||||
# Lenient decoding (skip validation)
|
# Lenient decoding (skip validation)
|
||||||
npx @toon-format/cli data.toon --no-strict -o output.json
|
npx @toon-format/cli data.toon --no-strict -o output.json
|
||||||
|
|
||||||
# Key folding for nested data (spec v1.5)
|
# Key folding for nested data
|
||||||
npx @toon-format/cli data.json --key-folding safe -o output.toon
|
npx @toon-format/cli data.json --key-folding safe -o output.toon
|
||||||
|
|
||||||
# Stdin workflows
|
# Stdin workflows
|
||||||
@@ -1015,7 +1014,6 @@ Converts any JSON-serializable value to TOON format.
|
|||||||
- `options` – Optional encoding options:
|
- `options` – Optional encoding options:
|
||||||
- `indent?: number` – Number of spaces per indentation level (default: `2`)
|
- `indent?: number` – Number of spaces per indentation level (default: `2`)
|
||||||
- `delimiter?: ',' | '\t' | '|'` – Delimiter for array values and tabular rows (default: `','`)
|
- `delimiter?: ',' | '\t' | '|'` – Delimiter for array values and tabular rows (default: `','`)
|
||||||
- `lengthMarker?: '#' | false` – Optional marker to prefix array lengths (default: `false`)
|
|
||||||
- `keyFolding?: 'off' | 'safe'` – Enable key folding to collapse single-key wrapper chains into dotted paths (default: `'off'`). When `'safe'`, only valid identifier segments are folded
|
- `keyFolding?: 'off' | 'safe'` – Enable key folding to collapse single-key wrapper chains into dotted paths (default: `'off'`). When `'safe'`, only valid identifier segments are folded
|
||||||
- `flattenDepth?: number` – Maximum number of segments to fold when `keyFolding` is enabled (default: `Infinity`). Values 0-1 have no practical effect
|
- `flattenDepth?: number` – Maximum number of segments to fold when `keyFolding` is enabled (default: `Infinity`). Values 0-1 have no practical effect
|
||||||
|
|
||||||
@@ -1098,37 +1096,6 @@ items[2|]{sku|name|qty|price}:
|
|||||||
B2|Gadget|1|14.5
|
B2|Gadget|1|14.5
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Length Marker Option
|
|
||||||
|
|
||||||
The `lengthMarker` option adds an optional hash (`#`) prefix to array lengths to emphasize that the bracketed value represents a count, not an index:
|
|
||||||
|
|
||||||
```ts
|
|
||||||
const data = {
|
|
||||||
tags: ['reading', 'gaming', 'coding'],
|
|
||||||
items: [
|
|
||||||
{ sku: 'A1', qty: 2, price: 9.99 },
|
|
||||||
{ sku: 'B2', qty: 1, price: 14.5 },
|
|
||||||
],
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log(
|
|
||||||
encode(data, { lengthMarker: '#' })
|
|
||||||
)
|
|
||||||
// tags[#3]: reading,gaming,coding
|
|
||||||
// items[#2]{sku,qty,price}:
|
|
||||||
// A1,2,9.99
|
|
||||||
// B2,1,14.5
|
|
||||||
|
|
||||||
// Custom delimiter with length marker
|
|
||||||
console.log(
|
|
||||||
encode(data, { lengthMarker: '#', delimiter: '|' })
|
|
||||||
)
|
|
||||||
// tags[#3|]: reading|gaming|coding
|
|
||||||
// items[#2|]{sku|qty|price}:
|
|
||||||
// A1|2|9.99
|
|
||||||
// B2|1|14.5
|
|
||||||
```
|
|
||||||
|
|
||||||
### `decode(input: string, options?: DecodeOptions): JsonValue`
|
### `decode(input: string, options?: DecodeOptions): JsonValue`
|
||||||
|
|
||||||
Converts a TOON-formatted string back to JavaScript values.
|
Converts a TOON-formatted string back to JavaScript values.
|
||||||
@@ -1179,7 +1146,7 @@ By default, the decoder validates input strictly:
|
|||||||
- Format familiarity and structure matter as much as token count. TOON's tabular format requires arrays of objects with identical keys and primitive values only. When this doesn't hold (due to mixed types, non-uniform objects, or nested structures), TOON switches to list format where JSON can be more efficient at scale.
|
- Format familiarity and structure matter as much as token count. TOON's tabular format requires arrays of objects with identical keys and primitive values only. When this doesn't hold (due to mixed types, non-uniform objects, or nested structures), TOON switches to list format where JSON can be more efficient at scale.
|
||||||
- **TOON excels at:** Uniform arrays of objects (same fields, primitive values), especially large datasets with consistent structure.
|
- **TOON excels at:** Uniform arrays of objects (same fields, primitive values), especially large datasets with consistent structure.
|
||||||
- **JSON is better for:** Non-uniform data, deeply nested structures, and objects with varying field sets.
|
- **JSON is better for:** Non-uniform data, deeply nested structures, and objects with varying field sets.
|
||||||
- **CSV is more compact for:** Flat, uniform tables without nesting. TOON adds structure (`[N]` length markers, delimiter scoping, deterministic quoting) that improves LLM reliability with minimal token overhead.
|
- **CSV is more compact for:** Flat, uniform tables without nesting. TOON adds structure (`[N]` array lengths, delimiter scoping, deterministic quoting) that improves LLM reliability with minimal token overhead.
|
||||||
- **Token counts vary by tokenizer and model.** Benchmarks use a GPT-style tokenizer (cl100k/o200k); actual savings will differ with other models (e.g., [SentencePiece](https://github.com/google/sentencepiece)).
|
- **Token counts vary by tokenizer and model.** Benchmarks use a GPT-style tokenizer (cl100k/o200k); actual savings will differ with other models (e.g., [SentencePiece](https://github.com/google/sentencepiece)).
|
||||||
- **TOON is designed for LLM input** where human readability and token efficiency matter. It's **not** a drop-in replacement for JSON in APIs or storage.
|
- **TOON is designed for LLM input** where human readability and token efficiency matter. It's **not** a drop-in replacement for JSON in APIs or storage.
|
||||||
|
|
||||||
@@ -1189,7 +1156,7 @@ TOON works best when you show the format instead of describing it. The structure
|
|||||||
|
|
||||||
### Sending TOON to LLMs (Input)
|
### Sending TOON to LLMs (Input)
|
||||||
|
|
||||||
Wrap your encoded data in a fenced code block (label it \`\`\`toon for clarity). The indentation and headers are usually enough – models treat it like familiar YAML or CSV. The explicit length markers (`[N]`) and field headers (`{field1,field2}`) help the model track structure, especially for large tables.
|
Wrap your encoded data in a fenced code block (label it \`\`\`toon for clarity). The indentation and headers are usually enough – models treat it like familiar YAML or CSV. The explicit array lengths (`[N]`) and field headers (`{field1,field2}`) help the model track structure, especially for large tables.
|
||||||
|
|
||||||
### Generating TOON from LLMs (Output)
|
### Generating TOON from LLMs (Output)
|
||||||
|
|
||||||
@@ -1267,7 +1234,7 @@ Task: Return only users with role "user" as TOON. Use the same header. Set [N] t
|
|||||||
## Other Implementations
|
## Other Implementations
|
||||||
|
|
||||||
> [!NOTE]
|
> [!NOTE]
|
||||||
> When implementing TOON in other languages, please follow the [specification](https://github.com/toon-format/spec/blob/main/SPEC.md) (currently v1.5) to ensure compatibility across implementations. The [conformance tests](https://github.com/toon-format/spec/tree/main/tests) provide language-agnostic test fixtures that validate your implementations.
|
> When implementing TOON in other languages, please follow the [specification](https://github.com/toon-format/spec/blob/main/SPEC.md) (currently v2.0) to ensure compatibility across implementations. The [conformance tests](https://github.com/toon-format/spec/tree/main/tests) provide language-agnostic test fixtures that validate your implementations.
|
||||||
|
|
||||||
### Official Implementations
|
### Official Implementations
|
||||||
|
|
||||||
|
|||||||
2
SPEC.md
2
SPEC.md
@@ -4,7 +4,7 @@ The TOON specification has moved to a dedicated repository: [github.com/toon-for
|
|||||||
|
|
||||||
## Current Version
|
## Current Version
|
||||||
|
|
||||||
**Version 1.4** (2025-11-05)
|
**Version 2.0** (2025-11-10)
|
||||||
|
|
||||||
## Quick Links
|
## Quick Links
|
||||||
|
|
||||||
|
|||||||
@@ -62,7 +62,6 @@ cat data.toon | toon --decode
|
|||||||
| `-d, --decode` | Force decode mode (overrides auto-detection) |
|
| `-d, --decode` | Force decode mode (overrides auto-detection) |
|
||||||
| `--delimiter <char>` | Array delimiter: `,` (comma), `\t` (tab), `\|` (pipe) |
|
| `--delimiter <char>` | Array delimiter: `,` (comma), `\t` (tab), `\|` (pipe) |
|
||||||
| `--indent <number>` | Indentation size (default: `2`) |
|
| `--indent <number>` | Indentation size (default: `2`) |
|
||||||
| `--length-marker` | Add `#` prefix to array lengths (e.g., `items[#3]`) |
|
|
||||||
| `--stats` | Show token count estimates and savings (encode only) |
|
| `--stats` | Show token count estimates and savings (encode only) |
|
||||||
| `--no-strict` | Disable strict validation when decoding |
|
| `--no-strict` | Disable strict validation when decoding |
|
||||||
| `--key-folding <mode>` | Enable key folding: `off`, `safe` (default: `off`) |
|
| `--key-folding <mode>` | Enable key folding: `off`, `safe` (default: `off`) |
|
||||||
@@ -122,7 +121,7 @@ cat large-dataset.json | toon --delimiter "\t" > output.toon
|
|||||||
jq '.results' data.json | toon > filtered.toon
|
jq '.results' data.json | toon > filtered.toon
|
||||||
```
|
```
|
||||||
|
|
||||||
### Key Folding (spec v1.5)
|
### Key Folding (Since v1.5)
|
||||||
|
|
||||||
Collapse nested wrapper chains to reduce tokens:
|
Collapse nested wrapper chains to reduce tokens:
|
||||||
|
|
||||||
|
|||||||
@@ -13,7 +13,6 @@ export async function encodeToToon(config: {
|
|||||||
output?: string
|
output?: string
|
||||||
indent: NonNullable<EncodeOptions['indent']>
|
indent: NonNullable<EncodeOptions['indent']>
|
||||||
delimiter: NonNullable<EncodeOptions['delimiter']>
|
delimiter: NonNullable<EncodeOptions['delimiter']>
|
||||||
lengthMarker: NonNullable<EncodeOptions['lengthMarker']>
|
|
||||||
keyFolding?: NonNullable<EncodeOptions['keyFolding']>
|
keyFolding?: NonNullable<EncodeOptions['keyFolding']>
|
||||||
flattenDepth?: number
|
flattenDepth?: number
|
||||||
printStats: boolean
|
printStats: boolean
|
||||||
@@ -31,7 +30,6 @@ export async function encodeToToon(config: {
|
|||||||
const encodeOptions: EncodeOptions = {
|
const encodeOptions: EncodeOptions = {
|
||||||
delimiter: config.delimiter,
|
delimiter: config.delimiter,
|
||||||
indent: config.indent,
|
indent: config.indent,
|
||||||
lengthMarker: config.lengthMarker,
|
|
||||||
keyFolding: config.keyFolding,
|
keyFolding: config.keyFolding,
|
||||||
flattenDepth: config.flattenDepth,
|
flattenDepth: config.flattenDepth,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -41,11 +41,6 @@ export const mainCommand: CommandDef<{
|
|||||||
description: string
|
description: string
|
||||||
default: string
|
default: string
|
||||||
}
|
}
|
||||||
lengthMarker: {
|
|
||||||
type: 'boolean'
|
|
||||||
description: string
|
|
||||||
default: false
|
|
||||||
}
|
|
||||||
strict: {
|
strict: {
|
||||||
type: 'boolean'
|
type: 'boolean'
|
||||||
description: string
|
description: string
|
||||||
@@ -107,11 +102,6 @@ export const mainCommand: CommandDef<{
|
|||||||
description: 'Indentation size',
|
description: 'Indentation size',
|
||||||
default: '2',
|
default: '2',
|
||||||
},
|
},
|
||||||
lengthMarker: {
|
|
||||||
type: 'boolean',
|
|
||||||
description: 'Use length marker (#) for arrays',
|
|
||||||
default: false,
|
|
||||||
},
|
|
||||||
strict: {
|
strict: {
|
||||||
type: 'boolean',
|
type: 'boolean',
|
||||||
description: 'Enable strict mode for decoding',
|
description: 'Enable strict mode for decoding',
|
||||||
@@ -187,10 +177,9 @@ export const mainCommand: CommandDef<{
|
|||||||
output: outputPath,
|
output: outputPath,
|
||||||
delimiter: delimiter as Delimiter,
|
delimiter: delimiter as Delimiter,
|
||||||
indent,
|
indent,
|
||||||
lengthMarker: args.lengthMarker === true ? '#' : false,
|
|
||||||
printStats: args.stats === true,
|
|
||||||
keyFolding: keyFolding as NonNullable<EncodeOptions['keyFolding']>,
|
keyFolding: keyFolding as NonNullable<EncodeOptions['keyFolding']>,
|
||||||
flattenDepth,
|
flattenDepth,
|
||||||
|
printStats: args.stats === true,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
|||||||
@@ -44,7 +44,6 @@ describe('toon CLI', () => {
|
|||||||
const expected = encode(data, {
|
const expected = encode(data, {
|
||||||
delimiter: DEFAULT_DELIMITER,
|
delimiter: DEFAULT_DELIMITER,
|
||||||
indent: 2,
|
indent: 2,
|
||||||
lengthMarker: false,
|
|
||||||
})
|
})
|
||||||
|
|
||||||
expect(output).toBe(expected)
|
expect(output).toBe(expected)
|
||||||
|
|||||||
@@ -38,6 +38,6 @@
|
|||||||
"test": "vitest"
|
"test": "vitest"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@toon-format/spec": "^1.5.2"
|
"@toon-format/spec": "^2.0.0"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -11,7 +11,6 @@ export const COMMA = ','
|
|||||||
export const COLON = ':'
|
export const COLON = ':'
|
||||||
export const SPACE = ' '
|
export const SPACE = ' '
|
||||||
export const PIPE = '|'
|
export const PIPE = '|'
|
||||||
export const HASH = '#'
|
|
||||||
export const DOT = '.'
|
export const DOT = '.'
|
||||||
|
|
||||||
// #endregion
|
// #endregion
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
import type { ArrayHeaderInfo, Delimiter, JsonPrimitive } from '../types'
|
import type { ArrayHeaderInfo, Delimiter, JsonPrimitive } from '../types'
|
||||||
import { BACKSLASH, CLOSE_BRACE, CLOSE_BRACKET, COLON, DELIMITERS, DOUBLE_QUOTE, FALSE_LITERAL, HASH, NULL_LITERAL, OPEN_BRACE, OPEN_BRACKET, PIPE, TAB, TRUE_LITERAL } from '../constants'
|
import { BACKSLASH, CLOSE_BRACE, CLOSE_BRACKET, COLON, DELIMITERS, DOUBLE_QUOTE, FALSE_LITERAL, NULL_LITERAL, OPEN_BRACE, OPEN_BRACKET, PIPE, TAB, TRUE_LITERAL } from '../constants'
|
||||||
import { isBooleanOrNullLiteral, isNumericLiteral } from '../shared/literal-utils'
|
import { isBooleanOrNullLiteral, isNumericLiteral } from '../shared/literal-utils'
|
||||||
import { findClosingQuote, findUnquotedChar, unescapeString } from '../shared/string-utils'
|
import { findClosingQuote, findUnquotedChar, unescapeString } from '../shared/string-utils'
|
||||||
|
|
||||||
@@ -84,7 +84,7 @@ export function parseArrayHeaderLine(
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
const { length, delimiter, hasLengthMarker } = parsedBracket
|
const { length, delimiter } = parsedBracket
|
||||||
|
|
||||||
// Check for fields segment
|
// Check for fields segment
|
||||||
let fields: string[] | undefined
|
let fields: string[] | undefined
|
||||||
@@ -102,7 +102,6 @@ export function parseArrayHeaderLine(
|
|||||||
length,
|
length,
|
||||||
delimiter,
|
delimiter,
|
||||||
fields,
|
fields,
|
||||||
hasLengthMarker,
|
|
||||||
},
|
},
|
||||||
inlineValues: afterColon || undefined,
|
inlineValues: afterColon || undefined,
|
||||||
}
|
}
|
||||||
@@ -111,16 +110,9 @@ export function parseArrayHeaderLine(
|
|||||||
export function parseBracketSegment(
|
export function parseBracketSegment(
|
||||||
seg: string,
|
seg: string,
|
||||||
defaultDelimiter: Delimiter,
|
defaultDelimiter: Delimiter,
|
||||||
): { length: number, delimiter: Delimiter, hasLengthMarker: boolean } {
|
): { length: number, delimiter: Delimiter } {
|
||||||
let hasLengthMarker = false
|
|
||||||
let content = seg
|
let content = seg
|
||||||
|
|
||||||
// Check for length marker
|
|
||||||
if (content.startsWith(HASH)) {
|
|
||||||
hasLengthMarker = true
|
|
||||||
content = content.slice(1)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check for delimiter suffix
|
// Check for delimiter suffix
|
||||||
let delimiter = defaultDelimiter
|
let delimiter = defaultDelimiter
|
||||||
if (content.endsWith(TAB)) {
|
if (content.endsWith(TAB)) {
|
||||||
@@ -137,7 +129,7 @@ export function parseBracketSegment(
|
|||||||
throw new TypeError(`Invalid array length: ${seg}`)
|
throw new TypeError(`Invalid array length: ${seg}`)
|
||||||
}
|
}
|
||||||
|
|
||||||
return { length, delimiter, hasLengthMarker }
|
return { length, delimiter }
|
||||||
}
|
}
|
||||||
|
|
||||||
// #endregion
|
// #endregion
|
||||||
|
|||||||
@@ -113,15 +113,15 @@ export function encodeArray(
|
|||||||
options: ResolvedEncodeOptions,
|
options: ResolvedEncodeOptions,
|
||||||
): void {
|
): void {
|
||||||
if (value.length === 0) {
|
if (value.length === 0) {
|
||||||
const header = formatHeader(0, { key, delimiter: options.delimiter, lengthMarker: options.lengthMarker })
|
const header = formatHeader(0, { key, delimiter: options.delimiter })
|
||||||
writer.push(depth, header)
|
writer.push(depth, header)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Primitive array
|
// Primitive array
|
||||||
if (isArrayOfPrimitives(value)) {
|
if (isArrayOfPrimitives(value)) {
|
||||||
const formatted = encodeInlineArrayLine(value, options.delimiter, key, options.lengthMarker)
|
const arrayLine = encodeInlineArrayLine(value, options.delimiter, key)
|
||||||
writer.push(depth, formatted)
|
writer.push(depth, arrayLine)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -161,19 +161,19 @@ export function encodeArrayOfArraysAsListItems(
|
|||||||
depth: Depth,
|
depth: Depth,
|
||||||
options: ResolvedEncodeOptions,
|
options: ResolvedEncodeOptions,
|
||||||
): void {
|
): void {
|
||||||
const header = formatHeader(values.length, { key: prefix, delimiter: options.delimiter, lengthMarker: options.lengthMarker })
|
const header = formatHeader(values.length, { key: prefix, delimiter: options.delimiter })
|
||||||
writer.push(depth, header)
|
writer.push(depth, header)
|
||||||
|
|
||||||
for (const arr of values) {
|
for (const arr of values) {
|
||||||
if (isArrayOfPrimitives(arr)) {
|
if (isArrayOfPrimitives(arr)) {
|
||||||
const inline = encodeInlineArrayLine(arr, options.delimiter, undefined, options.lengthMarker)
|
const arrayLine = encodeInlineArrayLine(arr, options.delimiter)
|
||||||
writer.pushListItem(depth + 1, inline)
|
writer.pushListItem(depth + 1, arrayLine)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export function encodeInlineArrayLine(values: readonly JsonPrimitive[], delimiter: string, prefix?: string, lengthMarker?: '#' | false): string {
|
export function encodeInlineArrayLine(values: readonly JsonPrimitive[], delimiter: string, prefix?: string): string {
|
||||||
const header = formatHeader(values.length, { key: prefix, delimiter, lengthMarker })
|
const header = formatHeader(values.length, { key: prefix, delimiter })
|
||||||
const joinedValue = encodeAndJoinPrimitives(values, delimiter)
|
const joinedValue = encodeAndJoinPrimitives(values, delimiter)
|
||||||
// Only add space if there are values
|
// Only add space if there are values
|
||||||
if (values.length === 0) {
|
if (values.length === 0) {
|
||||||
@@ -194,7 +194,7 @@ export function encodeArrayOfObjectsAsTabular(
|
|||||||
depth: Depth,
|
depth: Depth,
|
||||||
options: ResolvedEncodeOptions,
|
options: ResolvedEncodeOptions,
|
||||||
): void {
|
): void {
|
||||||
const formattedHeader = formatHeader(rows.length, { key: prefix, fields: header, delimiter: options.delimiter, lengthMarker: options.lengthMarker })
|
const formattedHeader = formatHeader(rows.length, { key: prefix, fields: header, delimiter: options.delimiter })
|
||||||
writer.push(depth, `${formattedHeader}`)
|
writer.push(depth, `${formattedHeader}`)
|
||||||
|
|
||||||
writeTabularRows(rows, header, writer, depth + 1, options)
|
writeTabularRows(rows, header, writer, depth + 1, options)
|
||||||
@@ -265,7 +265,7 @@ export function encodeMixedArrayAsListItems(
|
|||||||
depth: Depth,
|
depth: Depth,
|
||||||
options: ResolvedEncodeOptions,
|
options: ResolvedEncodeOptions,
|
||||||
): void {
|
): void {
|
||||||
const header = formatHeader(items.length, { key: prefix, delimiter: options.delimiter, lengthMarker: options.lengthMarker })
|
const header = formatHeader(items.length, { key: prefix, delimiter: options.delimiter })
|
||||||
writer.push(depth, header)
|
writer.push(depth, header)
|
||||||
|
|
||||||
for (const item of items) {
|
for (const item of items) {
|
||||||
@@ -289,15 +289,15 @@ export function encodeObjectAsListItem(obj: JsonObject, writer: LineWriter, dept
|
|||||||
else if (isJsonArray(firstValue)) {
|
else if (isJsonArray(firstValue)) {
|
||||||
if (isArrayOfPrimitives(firstValue)) {
|
if (isArrayOfPrimitives(firstValue)) {
|
||||||
// Inline format for primitive arrays
|
// Inline format for primitive arrays
|
||||||
const formatted = encodeInlineArrayLine(firstValue, options.delimiter, firstKey, options.lengthMarker)
|
const arrayPropertyLine = encodeInlineArrayLine(firstValue, options.delimiter, firstKey)
|
||||||
writer.pushListItem(depth, formatted)
|
writer.pushListItem(depth, arrayPropertyLine)
|
||||||
}
|
}
|
||||||
else if (isArrayOfObjects(firstValue)) {
|
else if (isArrayOfObjects(firstValue)) {
|
||||||
// Check if array of objects can use tabular format
|
// Check if array of objects can use tabular format
|
||||||
const header = extractTabularHeader(firstValue)
|
const header = extractTabularHeader(firstValue)
|
||||||
if (header) {
|
if (header) {
|
||||||
// Tabular format for uniform arrays of objects
|
// Tabular format for uniform arrays of objects
|
||||||
const formattedHeader = formatHeader(firstValue.length, { key: firstKey, fields: header, delimiter: options.delimiter, lengthMarker: options.lengthMarker })
|
const formattedHeader = formatHeader(firstValue.length, { key: firstKey, fields: header, delimiter: options.delimiter })
|
||||||
writer.pushListItem(depth, formattedHeader)
|
writer.pushListItem(depth, formattedHeader)
|
||||||
writeTabularRows(firstValue, header, writer, depth + 1, options)
|
writeTabularRows(firstValue, header, writer, depth + 1, options)
|
||||||
}
|
}
|
||||||
@@ -347,8 +347,8 @@ function encodeListItemValue(
|
|||||||
writer.pushListItem(depth, encodePrimitive(value, options.delimiter))
|
writer.pushListItem(depth, encodePrimitive(value, options.delimiter))
|
||||||
}
|
}
|
||||||
else if (isJsonArray(value) && isArrayOfPrimitives(value)) {
|
else if (isJsonArray(value) && isArrayOfPrimitives(value)) {
|
||||||
const inline = encodeInlineArrayLine(value, options.delimiter, undefined, options.lengthMarker)
|
const arrayLine = encodeInlineArrayLine(value, options.delimiter)
|
||||||
writer.pushListItem(depth, inline)
|
writer.pushListItem(depth, arrayLine)
|
||||||
}
|
}
|
||||||
else if (isJsonObject(value)) {
|
else if (isJsonObject(value)) {
|
||||||
encodeObjectAsListItem(value, writer, depth, options)
|
encodeObjectAsListItem(value, writer, depth, options)
|
||||||
|
|||||||
@@ -59,13 +59,11 @@ export function formatHeader(
|
|||||||
key?: string
|
key?: string
|
||||||
fields?: readonly string[]
|
fields?: readonly string[]
|
||||||
delimiter?: string
|
delimiter?: string
|
||||||
lengthMarker?: '#' | false
|
|
||||||
},
|
},
|
||||||
): string {
|
): string {
|
||||||
const key = options?.key
|
const key = options?.key
|
||||||
const fields = options?.fields
|
const fields = options?.fields
|
||||||
const delimiter = options?.delimiter ?? COMMA
|
const delimiter = options?.delimiter ?? COMMA
|
||||||
const lengthMarker = options?.lengthMarker ?? false
|
|
||||||
|
|
||||||
let header = ''
|
let header = ''
|
||||||
|
|
||||||
@@ -74,7 +72,7 @@ export function formatHeader(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Only include delimiter if it's not the default (comma)
|
// Only include delimiter if it's not the default (comma)
|
||||||
header += `[${lengthMarker || ''}${length}${delimiter !== DEFAULT_DELIMITER ? delimiter : ''}]`
|
header += `[${length}${delimiter !== DEFAULT_DELIMITER ? delimiter : ''}]`
|
||||||
|
|
||||||
if (fields) {
|
if (fields) {
|
||||||
const quotedFields = fields.map(f => encodeKey(f))
|
const quotedFields = fields.map(f => encodeKey(f))
|
||||||
|
|||||||
@@ -88,7 +88,6 @@ function resolveOptions(options?: EncodeOptions): ResolvedEncodeOptions {
|
|||||||
return {
|
return {
|
||||||
indent: options?.indent ?? 2,
|
indent: options?.indent ?? 2,
|
||||||
delimiter: options?.delimiter ?? DEFAULT_DELIMITER,
|
delimiter: options?.delimiter ?? DEFAULT_DELIMITER,
|
||||||
lengthMarker: options?.lengthMarker ?? false,
|
|
||||||
keyFolding: options?.keyFolding ?? 'off',
|
keyFolding: options?.keyFolding ?? 'off',
|
||||||
flattenDepth: options?.flattenDepth ?? Number.POSITIVE_INFINITY,
|
flattenDepth: options?.flattenDepth ?? Number.POSITIVE_INFINITY,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -24,12 +24,6 @@ export interface EncodeOptions {
|
|||||||
* @default DELIMITERS.comma
|
* @default DELIMITERS.comma
|
||||||
*/
|
*/
|
||||||
delimiter?: Delimiter
|
delimiter?: Delimiter
|
||||||
/**
|
|
||||||
* Optional marker to prefix array lengths in headers.
|
|
||||||
* When set to `#`, arrays render as [#N] instead of [N].
|
|
||||||
* @default false
|
|
||||||
*/
|
|
||||||
lengthMarker?: '#' | false
|
|
||||||
/**
|
/**
|
||||||
* Enable key folding to collapse single-key wrapper chains.
|
* Enable key folding to collapse single-key wrapper chains.
|
||||||
* When set to 'safe', nested objects with single keys are collapsed into dotted paths
|
* When set to 'safe', nested objects with single keys are collapsed into dotted paths
|
||||||
@@ -84,7 +78,6 @@ export interface ArrayHeaderInfo {
|
|||||||
length: number
|
length: number
|
||||||
delimiter: Delimiter
|
delimiter: Delimiter
|
||||||
fields?: string[]
|
fields?: string[]
|
||||||
hasLengthMarker: boolean
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface ParsedLine {
|
export interface ParsedLine {
|
||||||
|
|||||||
@@ -7,7 +7,6 @@ import arraysTabular from '@toon-format/spec/tests/fixtures/encode/arrays-tabula
|
|||||||
import delimiters from '@toon-format/spec/tests/fixtures/encode/delimiters.json'
|
import delimiters from '@toon-format/spec/tests/fixtures/encode/delimiters.json'
|
||||||
import keyFolding from '@toon-format/spec/tests/fixtures/encode/key-folding.json'
|
import keyFolding from '@toon-format/spec/tests/fixtures/encode/key-folding.json'
|
||||||
import objects from '@toon-format/spec/tests/fixtures/encode/objects.json'
|
import objects from '@toon-format/spec/tests/fixtures/encode/objects.json'
|
||||||
import options from '@toon-format/spec/tests/fixtures/encode/options.json'
|
|
||||||
import primitives from '@toon-format/spec/tests/fixtures/encode/primitives.json'
|
import primitives from '@toon-format/spec/tests/fixtures/encode/primitives.json'
|
||||||
import whitespace from '@toon-format/spec/tests/fixtures/encode/whitespace.json'
|
import whitespace from '@toon-format/spec/tests/fixtures/encode/whitespace.json'
|
||||||
import { describe, expect, it } from 'vitest'
|
import { describe, expect, it } from 'vitest'
|
||||||
@@ -23,7 +22,6 @@ const fixtureFiles = [
|
|||||||
keyFolding,
|
keyFolding,
|
||||||
delimiters,
|
delimiters,
|
||||||
whitespace,
|
whitespace,
|
||||||
options,
|
|
||||||
] as Fixtures[]
|
] as Fixtures[]
|
||||||
|
|
||||||
for (const fixtures of fixtureFiles) {
|
for (const fixtures of fixtureFiles) {
|
||||||
@@ -49,7 +47,6 @@ function resolveEncodeOptions(options?: TestCase['options']): ResolvedEncodeOpti
|
|||||||
return {
|
return {
|
||||||
indent: options?.indent ?? 2,
|
indent: options?.indent ?? 2,
|
||||||
delimiter: options?.delimiter ?? DEFAULT_DELIMITER,
|
delimiter: options?.delimiter ?? DEFAULT_DELIMITER,
|
||||||
lengthMarker: options?.lengthMarker === '#' ? '#' : false,
|
|
||||||
keyFolding: options?.keyFolding ?? 'off',
|
keyFolding: options?.keyFolding ?? 'off',
|
||||||
flattenDepth: options?.flattenDepth ?? Number.POSITIVE_INFINITY,
|
flattenDepth: options?.flattenDepth ?? Number.POSITIVE_INFINITY,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -13,7 +13,6 @@ export interface TestCase {
|
|||||||
options?: {
|
options?: {
|
||||||
delimiter?: ',' | '\t' | '|'
|
delimiter?: ',' | '\t' | '|'
|
||||||
indent?: number
|
indent?: number
|
||||||
lengthMarker?: '#'
|
|
||||||
strict?: boolean
|
strict?: boolean
|
||||||
keyFolding?: 'off' | 'safe'
|
keyFolding?: 'off' | 'safe'
|
||||||
flattenDepth?: number
|
flattenDepth?: number
|
||||||
|
|||||||
10
pnpm-lock.yaml
generated
10
pnpm-lock.yaml
generated
@@ -102,8 +102,8 @@ importers:
|
|||||||
packages/toon:
|
packages/toon:
|
||||||
devDependencies:
|
devDependencies:
|
||||||
'@toon-format/spec':
|
'@toon-format/spec':
|
||||||
specifier: ^1.5.2
|
specifier: ^2.0.0
|
||||||
version: 1.5.2
|
version: 2.0.0
|
||||||
|
|
||||||
packages:
|
packages:
|
||||||
|
|
||||||
@@ -833,8 +833,8 @@ packages:
|
|||||||
peerDependencies:
|
peerDependencies:
|
||||||
eslint: '>=9.0.0'
|
eslint: '>=9.0.0'
|
||||||
|
|
||||||
'@toon-format/spec@1.5.2':
|
'@toon-format/spec@2.0.0':
|
||||||
resolution: {integrity: sha512-PNEIbKQeW5dp/Q+v2wxDlLmxYz3zeIg4qBXUpx9DFGL98yMjUxQSSwpXTITyPgRxCynpksuOJZexTFVdAUugeQ==}
|
resolution: {integrity: sha512-rNM1N74QcbUnI7quWL3RRhAPl/dFzw9RhPxGf8l2ICRvoY76w8wAp2ydE3mq8re1+cy4HAWr5wCRcNrlT8zzRQ==}
|
||||||
|
|
||||||
'@tybys/wasm-util@0.10.1':
|
'@tybys/wasm-util@0.10.1':
|
||||||
resolution: {integrity: sha512-9tTaPJLSiejZKx+Bmog4uSubteqTvFrVrURwkmHixBo0G4seD0zUxp98E1DzUBJxLQ3NPwXrGKDiVjwx/DpPsg==}
|
resolution: {integrity: sha512-9tTaPJLSiejZKx+Bmog4uSubteqTvFrVrURwkmHixBo0G4seD0zUxp98E1DzUBJxLQ3NPwXrGKDiVjwx/DpPsg==}
|
||||||
@@ -3042,7 +3042,7 @@ snapshots:
|
|||||||
estraverse: 5.3.0
|
estraverse: 5.3.0
|
||||||
picomatch: 4.0.3
|
picomatch: 4.0.3
|
||||||
|
|
||||||
'@toon-format/spec@1.5.2': {}
|
'@toon-format/spec@2.0.0': {}
|
||||||
|
|
||||||
'@tybys/wasm-util@0.10.1':
|
'@tybys/wasm-util@0.10.1':
|
||||||
dependencies:
|
dependencies:
|
||||||
|
|||||||
Reference in New Issue
Block a user