diff --git a/README.md b/README.md index c58e7ba..571f297 100644 --- a/README.md +++ b/README.md @@ -234,7 +234,8 @@ const data = { id: 123, name: 'Ada', tags: ['admin', 'ops'], - active: true + active: true, + preferences: [] } } @@ -249,6 +250,7 @@ user: name: Ada tags[2]: admin,ops active: true + preferences[0]: ``` ## Canonical Formatting Rules @@ -260,7 +262,9 @@ TOON formatting is deterministic and minimal: - `key: value` for primitives (single space after colon). - `key:` for nested/empty objects (no trailing space on that line). - **Arrays**: - - Primitive arrays inline: `key[N]: v1,v2` (no spaces after commas). + - Delimiter encoding: Comma delimiters are implicit in array headers (e.g., `tags[3]:`, `items[2]{id,name}:`). Tab and pipe delimiters are explicitly shown in array headers (e.g., `tags[3|]:`, `items[2 ]{id name}:`). + - Primitive arrays inline: `key[N]: v1,v2` (comma) or `key[N]: v1v2` (tab/pipe). + - Tabular arrays: `key[N]{f1,f2}: …` (comma) or `key[N]{f1f2}: …` (tab/pipe). - List items: two spaces, hyphen, space (`" - …"`). - **Whitespace invariants**: - No trailing spaces at end of any line. @@ -306,7 +310,7 @@ user: ### Arrays > [!TIP] -> TOON includes the array length in brackets (e.g., `items[3]` or `[2]`). This explicit count helps LLMs track the number of elements, reducing errors when generating or validating structured output. +> TOON includes the array length in brackets (e.g., `items[3]`). When using comma delimiters (default), the delimiter is implicit. When using tab or pipe delimiters, the delimiter is explicitly shown in the header (e.g., `tags[2|]` or `[2 ]`). This encoding helps LLMs identify the delimiter and track the number of elements, reducing errors when generating or validating structured output. #### Primitive Arrays (Inline) @@ -454,8 +458,8 @@ String values are quoted when any of the following is true: | Starts with `"- "` (list-like) | `"- item"` | | Looks like structural token | `"[5]"`, `"{key}"`, `"[3]: x,y"` | -> [!NOTE] -> **Delimiter-aware quoting:** The quoting rules are context-sensitive. When using tab or pipe delimiters, commas don't need quoting. Only the active delimiter triggers quoting – this applies to both array values and object values. +> [!IMPORTANT] +> **Delimiter-aware quoting:** Unquoted strings never contain `:` or the active delimiter. This makes TOON reliably parseable with simple heuristics: split key/value on first `: `, and split array values on the delimiter declared in the array header. When using tab or pipe delimiters, commas don't need quoting – only the active delimiter triggers quoting for both array values and object values. #### Examples @@ -478,9 +482,10 @@ For arrays of objects to use the efficient tabular format, all of the following | All elements are objects | No primitives in the array | | Identical key sets | No missing or extra keys across rows | | Primitive values only | No nested arrays or objects | +| Header delimiter | Comma is implicit in headers (`[N]{f1,f2}`); tab and pipe are explicit (`[N ]{f1 f2}`, `[N|]{f1|f2}`) | | Header key order | Taken from the first object | -| Header key quoting | Same rules as object keys | -| Row value quoting | Same rules as string values | +| Header key quoting | Same rules as object keys; keys containing the active delimiter must be quoted | +| Row value quoting | Same rules as string values; values containing the active delimiter must be quoted | If any condition fails, TOON falls back to list format. @@ -568,7 +573,7 @@ console.log(encode(data, { delimiter: '\t' })) **Output:** ``` -items[2]{sku,name,qty,price}: +items[2 ]{sku name qty price}: A1 Widget 2 9.99 B2 Gadget 1 14.5 ``` @@ -577,6 +582,7 @@ items[2]{sku,name,qty,price}: - Tabs are single characters and often tokenize more efficiently than commas. - Tabs rarely appear in natural text, reducing the need for quote-escaping. +- The delimiter is explicitly encoded in the array header, making it self-descriptive. **Considerations:** @@ -594,7 +600,7 @@ console.log(encode(data, { delimiter: '|' })) **Output:** ``` -items[2]{sku,name,qty,price}: +items[2|]{sku|name|qty|price}: A1|Widget|2|9.99 B2|Gadget|1|14.5 ``` diff --git a/src/encoders.ts b/src/encoders.ts index 9a3de5d..d54f6de 100644 --- a/src/encoders.ts +++ b/src/encoders.ts @@ -88,13 +88,8 @@ export function encodeArray( options: ResolvedEncodeOptions, ): void { if (value.length === 0) { - if (key === undefined) { - writer.push(depth, '[0]:') - } - else { - const encodedKey = encodeKey(key) - writer.push(depth, `${encodedKey}[0]:`) - } + const header = formatHeader(0, key ? { key, delimiter: options.delimiter } : { delimiter: options.delimiter }) + writer.push(depth, header) return } @@ -155,7 +150,7 @@ export function encodeArrayOfArraysAsListItems( depth: Depth, options: ResolvedEncodeOptions, ): void { - const header = formatHeader(values.length, prefix ? { key: prefix } : undefined) + const header = formatHeader(values.length, prefix ? { key: prefix, delimiter: options.delimiter } : { delimiter: options.delimiter }) writer.push(depth, header) for (const arr of values) { @@ -167,7 +162,7 @@ export function encodeArrayOfArraysAsListItems( } export function formatInlineArray(values: readonly JsonPrimitive[], delimiter: string, prefix?: string): string { - const header = formatHeader(values.length, prefix ? { key: prefix } : undefined) + const header = formatHeader(values.length, prefix ? { key: prefix, delimiter } : { delimiter }) const joinedValue = joinEncodedValues(values, delimiter) // Only add space if there are values if (values.length === 0) { @@ -188,7 +183,7 @@ export function encodeArrayOfObjectsAsTabular( depth: Depth, options: ResolvedEncodeOptions, ): void { - const headerStr = formatHeader(rows.length, { key: prefix, fields: header }) + const headerStr = formatHeader(rows.length, { key: prefix, fields: header, delimiter: options.delimiter }) writer.push(depth, `${headerStr}`) writeTabularRows(rows, header, writer, depth + 1, options) @@ -259,7 +254,7 @@ export function encodeMixedArrayAsListItems( depth: Depth, options: ResolvedEncodeOptions, ): void { - const header = formatHeader(items.length, prefix ? { key: prefix } : undefined) + const header = formatHeader(items.length, prefix ? { key: prefix, delimiter: options.delimiter } : { delimiter: options.delimiter }) writer.push(depth, header) for (const item of items) { @@ -307,7 +302,7 @@ export function encodeObjectAsListItem(obj: JsonObject, writer: LineWriter, dept const header = detectTabularHeader(firstValue) if (header) { // Tabular format for uniform arrays of objects - const headerStr = formatHeader(firstValue.length, { key: firstKey, fields: header }) + const headerStr = formatHeader(firstValue.length, { key: firstKey, fields: header, delimiter: options.delimiter }) writer.push(depth, `${LIST_ITEM_PREFIX}${headerStr}`) writeTabularRows(firstValue, header, writer, depth + 1, options) } diff --git a/src/primitives.ts b/src/primitives.ts index e61f0dc..1b6ecf4 100644 --- a/src/primitives.ts +++ b/src/primitives.ts @@ -2,6 +2,7 @@ import type { JsonPrimitive } from './types' import { BACKSLASH, COMMA, + DEFAULT_DELIMITER, DOUBLE_QUOTE, FALSE_LITERAL, LIST_ITEM_MARKER, @@ -139,10 +140,12 @@ export function formatHeader( options?: { key?: string fields?: readonly string[] + delimiter?: string }, ): string { const key = options?.key const fields = options?.fields + const delimiter = options?.delimiter ?? COMMA let header = '' @@ -150,11 +153,12 @@ export function formatHeader( header += encodeKey(key) } - header += `[${length}]` + // Only include delimiter if it's not the default (comma) + header += `[${length}${delimiter !== DEFAULT_DELIMITER ? delimiter : ''}]` if (fields) { const quotedFields = fields.map(f => encodeKey(f)) - header += `{${quotedFields.join(',')}}` + header += `{${quotedFields.join(delimiter)}}` } header += ':' diff --git a/test/index.test.ts b/test/index.test.ts index 6b2b204..7dd02ca 100644 --- a/test/index.test.ts +++ b/test/index.test.ts @@ -597,12 +597,12 @@ describe('delimiter options', () => { { delimiter: ',' as const, name: 'comma', expected: 'admin,ops,dev' }, ])('encodes primitive arrays with $name', ({ delimiter, expected }) => { const obj = { tags: ['admin', 'ops', 'dev'] } - expect(encode(obj, { delimiter })).toBe(`tags[3]: ${expected}`) + expect(encode(obj, { delimiter })).toBe(`tags[3${delimiter !== ',' ? delimiter : ''}]: ${expected}`) }) it.each([ - { delimiter: '\t' as const, name: 'tab', expected: 'items[2]{sku,qty,price}:\n A1\t2\t9.99\n B2\t1\t14.5' }, - { delimiter: '|' as const, name: 'pipe', expected: 'items[2]{sku,qty,price}:\n A1|2|9.99\n B2|1|14.5' }, + { delimiter: '\t' as const, name: 'tab', expected: 'items[2\t]{sku\tqty\tprice}:\n A1\t2\t9.99\n B2\t1\t14.5' }, + { delimiter: '|' as const, name: 'pipe', expected: 'items[2|]{sku|qty|price}:\n A1|2|9.99\n B2|1|14.5' }, ])('encodes tabular arrays with $name', ({ delimiter, expected }) => { const obj = { items: [ @@ -614,8 +614,8 @@ describe('delimiter options', () => { }) it.each([ - { delimiter: '\t' as const, name: 'tab', expected: 'pairs[2]:\n - [2]: a\tb\n - [2]: c\td' }, - { delimiter: '|' as const, name: 'pipe', expected: 'pairs[2]:\n - [2]: a|b\n - [2]: c|d' }, + { delimiter: '\t' as const, name: 'tab', expected: 'pairs[2\t]:\n - [2\t]: a\tb\n - [2\t]: c\td' }, + { delimiter: '|' as const, name: 'pipe', expected: 'pairs[2|]:\n - [2|]: a|b\n - [2|]: c|d' }, ])('encodes nested arrays with $name', ({ delimiter, expected }) => { const obj = { pairs: [['a', 'b'], ['c', 'd']] } expect(encode(obj, { delimiter })).toBe(expected) @@ -626,12 +626,12 @@ describe('delimiter options', () => { { delimiter: '|' as const, name: 'pipe' }, ])('encodes root arrays with $name', ({ delimiter }) => { const arr = ['x', 'y', 'z'] - expect(encode(arr, { delimiter })).toBe(`[3]: x${delimiter}y${delimiter}z`) + expect(encode(arr, { delimiter })).toBe(`[3${delimiter}]: x${delimiter}y${delimiter}z`) }) it.each([ - { delimiter: '\t' as const, name: 'tab', expected: '[2]{id}:\n 1\n 2' }, - { delimiter: '|' as const, name: 'pipe', expected: '[2]{id}:\n 1\n 2' }, + { delimiter: '\t' as const, name: 'tab', expected: '[2\t]{id}:\n 1\n 2' }, + { delimiter: '|' as const, name: 'pipe', expected: '[2|]{id}:\n 1\n 2' }, ])('encodes root arrays of objects with $name', ({ delimiter, expected }) => { const arr = [{ id: 1 }, { id: 2 }] expect(encode(arr, { delimiter })).toBe(expected) @@ -643,14 +643,14 @@ describe('delimiter options', () => { { delimiter: '\t' as const, name: 'tab', char: '\t', input: ['a', 'b\tc', 'd'], expected: 'a\t"b\\tc"\td' }, { delimiter: '|' as const, name: 'pipe', char: '|', input: ['a', 'b|c', 'd'], expected: 'a|"b|c"|d' }, ])('quotes strings containing $name', ({ delimiter, input, expected }) => { - expect(encode({ items: input }, { delimiter })).toBe(`items[${input.length}]: ${expected}`) + expect(encode({ items: input }, { delimiter })).toBe(`items[${input.length}${delimiter}]: ${expected}`) }) it.each([ { delimiter: '\t' as const, name: 'tab', input: ['a,b', 'c,d'], expected: 'a,b\tc,d' }, { delimiter: '|' as const, name: 'pipe', input: ['a,b', 'c,d'], expected: 'a,b|c,d' }, ])('does not quote commas with $name', ({ delimiter, input, expected }) => { - expect(encode({ items: input }, { delimiter })).toBe(`items[${input.length}]: ${expected}`) + expect(encode({ items: input }, { delimiter })).toBe(`items[${input.length}${delimiter}]: ${expected}`) }) it('quotes tabular values containing the delimiter', () => { @@ -661,7 +661,7 @@ describe('delimiter options', () => { ], } expect(encode(obj, { delimiter: ',' })).toBe('items[2]{id,note}:\n 1,"a,b"\n 2,"c,d"') - expect(encode(obj, { delimiter: '\t' })).toBe('items[2]{id,note}:\n 1\ta,b\n 2\tc,d') + expect(encode(obj, { delimiter: '\t' })).toBe('items[2\t]{id\tnote}:\n 1\ta,b\n 2\tc,d') }) it('does not quote commas in object values with non-comma delimiter', () => { @@ -670,22 +670,22 @@ describe('delimiter options', () => { }) it('quotes nested array values containing the delimiter', () => { - expect(encode({ pairs: [['a', 'b|c']] }, { delimiter: '|' })).toBe('pairs[1]:\n - [2]: a|"b|c"') - expect(encode({ pairs: [['a', 'b\tc']] }, { delimiter: '\t' })).toBe('pairs[1]:\n - [2]: a\t"b\\tc"') + expect(encode({ pairs: [['a', 'b|c']] }, { delimiter: '|' })).toBe('pairs[1|]:\n - [2|]: a|"b|c"') + expect(encode({ pairs: [['a', 'b\tc']] }, { delimiter: '\t' })).toBe('pairs[1\t]:\n - [2\t]: a\t"b\\tc"') }) }) describe('delimiter-independent quoting rules', () => { it('preserves ambiguity quoting regardless of delimiter', () => { const obj = { items: ['true', '42', '-3.14'] } - expect(encode(obj, { delimiter: '|' })).toBe('items[3]: "true"|"42"|"-3.14"') - expect(encode(obj, { delimiter: '\t' })).toBe('items[3]: "true"\t"42"\t"-3.14"') + expect(encode(obj, { delimiter: '|' })).toBe('items[3|]: "true"|"42"|"-3.14"') + expect(encode(obj, { delimiter: '\t' })).toBe('items[3\t]: "true"\t"42"\t"-3.14"') }) it('preserves structural quoting regardless of delimiter', () => { const obj = { items: ['[5]', '{key}', '- item'] } - expect(encode(obj, { delimiter: '|' })).toBe('items[3]: "[5]"|"{key}"|"- item"') - expect(encode(obj, { delimiter: '\t' })).toBe('items[3]: "[5]"\t"{key}"\t"- item"') + expect(encode(obj, { delimiter: '|' })).toBe('items[3|]: "[5]"|"{key}"|"- item"') + expect(encode(obj, { delimiter: '\t' })).toBe('items[3\t]: "[5]"\t"{key}"\t"- item"') }) it('quotes keys containing the delimiter', () => { @@ -695,13 +695,13 @@ describe('delimiter options', () => { it('quotes tabular headers containing the delimiter', () => { const obj = { items: [{ 'a|b': 1 }, { 'a|b': 2 }] } - expect(encode(obj, { delimiter: '|' })).toBe('items[2]{"a|b"}:\n 1\n 2') + expect(encode(obj, { delimiter: '|' })).toBe('items[2|]{"a|b"}:\n 1\n 2') }) - it('always uses commas in tabular headers regardless of delimiter', () => { + it('header uses the active delimiter', () => { const obj = { items: [{ a: 1, b: 2 }, { a: 3, b: 4 }] } - expect(encode(obj, { delimiter: '|' })).toBe('items[2]{a,b}:\n 1|2\n 3|4') - expect(encode(obj, { delimiter: '\t' })).toBe('items[2]{a,b}:\n 1\t2\n 3\t4') + expect(encode(obj, { delimiter: '|' })).toBe('items[2|]{a|b}:\n 1|2\n 3|4') + expect(encode(obj, { delimiter: '\t' })).toBe('items[2\t]{a\tb}:\n 1\t2\n 3\t4') }) })