diff --git a/README.md b/README.md index a485366..44494a3 100644 --- a/README.md +++ b/README.md @@ -388,6 +388,9 @@ encode({ config: {} }) // config: TOON quotes strings **only when necessary** to maximize token efficiency. Inner spaces are allowed; leading or trailing spaces force quotes. Unicode and emoji are safe unquoted. +> [!NOTE] +> When using alternative delimiters (tab or pipe), the quoting rules adapt automatically. Strings containing the active delimiter will be quoted, while other delimiters remain safe. + #### Keys Keys are quoted when any of the following is true: @@ -411,12 +414,35 @@ String values are quoted when any of the following is true: | Condition | Examples | |---|---| | Empty string | `""` | -| Contains comma, colon, quote, backslash, or control chars | `"a,b"`, `"a:b"`, `"say \"hi\""`, `"C:\\Users"`, `"line1\\nline2"` | +| Contains active delimiter, colon, quote, backslash, or control chars | `"a,b"` (comma), `"a\tb"` (tab), `"a\|b"` (pipe), `"a:b"`, `"say \"hi\""`, `"C:\\Users"`, `"line1\\nline2"` | | Leading or trailing spaces | `" padded "`, `" "` | | Looks like boolean/number/null | `"true"`, `"false"`, `"null"`, `"42"`, `"-3.14"`, `"1e-6"`, `"05"` | | Starts with `"- "` (list-like) | `"- item"` | | Looks like structural token | `"[5]"`, `"{key}"`, `"[3]: x,y"` | +**Delimiter-specific behavior:** + +The quoting rules are context-sensitive based on the active delimiter. A character only needs quoting if it's the active delimiter: + +```ts +// With comma delimiter (default): commas need quotes, tabs don't +encode({ items: ['a,b', 'c\td'] }) +// → items[2]: "a,b",c d + +// With tab delimiter: tabs need quotes, commas don't +encode({ items: ['a,b', 'c\td'] }, { delimiter: '\t' }) +// → items[2]: a,b "c\td" + +// With pipe delimiter: pipes need quotes, commas and tabs don't +encode({ items: ['a|b', 'c,d'] }, { delimiter: '|' }) +// → items[2]: "a|b"|c,d + +// Object values follow the same context-sensitive quoting +encode({ note: 'a,b' }) // → note: "a,b" +encode({ note: 'a,b' }, { delimiter: '|' }) // → note: a,b +encode({ note: 'a,b' }, { delimiter: '\t' }) // → note: a,b +``` + #### Examples ``` @@ -468,13 +494,16 @@ Number normalization examples: ## API -### `encode(value: unknown): string` +### `encode(value: unknown, options?: EncodeOptions): string` Converts any JSON-serializable value to TOON format. **Parameters:** - `value` – Any JSON-serializable value (object, array, primitive, or nested structure). Non-JSON-serializable values (functions, symbols, undefined, non-finite numbers) are converted to `null`. Dates are converted to ISO strings, and BigInts are emitted as decimal integers (no quotes). +- `options` – Optional encoding options: + - `indent?: number` – Number of spaces per indentation level (default: `2`) + - `delimiter?: ',' | '\t' | '|'` – Delimiter for array values and tabular rows (default: `','`) **Returns:** @@ -501,6 +530,76 @@ items[2]{sku,qty,price}: B2,1,14.5 ``` +#### Delimiter Options + +The `delimiter` option allows you to choose between comma (default), tab, or pipe delimiters for array values and tabular rows. Alternative delimiters can provide additional token savings in specific contexts. + +##### Tab Delimiter (`\t`) + +Using tab delimiters instead of commas can reduce token count further, especially for tabular data: + +```ts +import { encode } from 'toon' + +const data = { + items: [ + { sku: 'A1', name: 'Widget', qty: 2, price: 9.99 }, + { sku: 'B2', name: 'Gadget', qty: 1, price: 14.5 } + ] +} + +console.log(encode(data, { delimiter: '\t' })) +``` + +**Output:** + +``` +items[2]{sku,name,qty,price}: + A1 Widget 2 9.99 + B2 Gadget 1 14.5 +``` + +**Benefits:** + +- Tabs are single characters and often tokenize more efficiently than commas +- Tabs rarely appear in natural text, reducing the need for quote-escaping + +**Considerations:** + +- Some terminals and editors may collapse or expand tabs visually +- String values containing tabs will still require quoting + +##### Pipe Delimiter (`|`) + +Pipe delimiters offer a middle ground between commas and tabs: + +```ts +console.log(encode(data, { delimiter: '|' })) +``` + +**Output:** + +``` +items[2]{sku,name,qty,price}: + A1|Widget|2|9.99 + B2|Gadget|1|14.5 +``` + +##### Delimiter Selection Guide + +| Delimiter | Token Efficiency | Human Readability | Quote Escaping | Best For | +|-----------|-----------------|-------------------|----------------|----------| +| `,` (comma) | ⭐⭐⭐ Baseline | ⭐⭐⭐⭐⭐ High | ⭐⭐⭐ Moderate | **Default choice**, general use | +| `\t` (tab) | ⭐⭐⭐⭐⭐ Highest | ⭐⭐ Lower | ⭐⭐⭐⭐⭐ Minimal | Large datasets, maximum efficiency | +| `\|` (pipe) | ⭐⭐⭐⭐ High | ⭐⭐⭐⭐ Good | ⭐⭐⭐⭐ Low | CSV-like data, CLI output | + +> [!TIP] +> When using non-default delimiters, inform the LLM of the delimiter in your prompt: +> ``` +> The following data uses tab-delimited TOON format: +> ```[tab-delimited content]``` +> ``` + ## Using TOON in LLM Prompts When incorporating TOON into your LLM workflows: diff --git a/src/constants.ts b/src/constants.ts index 96f5449..4682897 100644 --- a/src/constants.ts +++ b/src/constants.ts @@ -10,6 +10,7 @@ export const LIST_ITEM_PREFIX = '- ' export const COMMA = ',' export const COLON = ':' export const SPACE = ' ' +export const PIPE = '|' // #endregion diff --git a/src/encoders.ts b/src/encoders.ts index 2764f2c..d144ed5 100644 --- a/src/encoders.ts +++ b/src/encoders.ts @@ -30,16 +30,16 @@ import { LineWriter } from './writer' export function encodeValue(value: JsonValue, options: ResolvedEncodeOptions): string { if (isJsonPrimitive(value)) { - return encodePrimitive(value) + return encodePrimitive(value, options.delimiter) } const writer = new LineWriter(options.indent) if (isJsonArray(value)) { - encodeRootArray(value, writer) + encodeRootArray(value, writer, options) } else if (isJsonObject(value)) { - encodeObject(value, writer, 0) + encodeObject(value, writer, 0, options) } return writer.toString() @@ -49,22 +49,22 @@ export function encodeValue(value: JsonValue, options: ResolvedEncodeOptions): s // #region Object encoding -export function encodeObject(value: JsonObject, writer: LineWriter, depth: Depth): void { +export function encodeObject(value: JsonObject, writer: LineWriter, depth: Depth, options: ResolvedEncodeOptions): void { const keys = Object.keys(value) for (const key of keys) { - encodeKeyValuePair(key, value[key]!, writer, depth) + encodeKeyValuePair(key, value[key]!, writer, depth, options) } } -export function encodeKeyValuePair(key: string, value: JsonValue, writer: LineWriter, depth: Depth): void { +export function encodeKeyValuePair(key: string, value: JsonValue, writer: LineWriter, depth: Depth, options: ResolvedEncodeOptions): void { const encodedKey = encodeKey(key) if (isJsonPrimitive(value)) { - writer.push(depth, `${encodedKey}: ${encodePrimitive(value)}`) + writer.push(depth, `${encodedKey}: ${encodePrimitive(value, options.delimiter)}`) } else if (isJsonArray(value)) { - encodeArrayProperty(key, value, writer, depth) + encodeArrayProperty(key, value, writer, depth, options) } else if (isJsonObject(value)) { const nestedKeys = Object.keys(value) @@ -74,7 +74,7 @@ export function encodeKeyValuePair(key: string, value: JsonValue, writer: LineWr } else { writer.push(depth, `${encodedKey}:`) - encodeObject(value, writer, depth + 1) + encodeObject(value, writer, depth + 1, options) } } } @@ -83,7 +83,7 @@ export function encodeKeyValuePair(key: string, value: JsonValue, writer: LineWr // #region Array encoding -export function encodeRootArray(value: JsonArray, writer: LineWriter): void { +export function encodeRootArray(value: JsonArray, writer: LineWriter, options: ResolvedEncodeOptions): void { if (value.length === 0) { writer.push(0, '[0]:') return @@ -91,7 +91,7 @@ export function encodeRootArray(value: JsonArray, writer: LineWriter): void { // Primitive array if (isArrayOfPrimitives(value)) { - encodeInlinePrimitiveArray(undefined, value, writer, 0) + encodeInlinePrimitiveArray(undefined, value, writer, 0, options) return } @@ -99,7 +99,7 @@ export function encodeRootArray(value: JsonArray, writer: LineWriter): void { if (isArrayOfArrays(value)) { const allPrimitiveArrays = value.every(arr => isArrayOfPrimitives(arr)) if (allPrimitiveArrays) { - encodeArrayOfArraysAsListItems(undefined, value, writer, 0) + encodeArrayOfArraysAsListItems(undefined, value, writer, 0, options) return } } @@ -108,19 +108,19 @@ export function encodeRootArray(value: JsonArray, writer: LineWriter): void { if (isArrayOfObjects(value)) { const header = detectTabularHeader(value) if (header) { - encodeArrayOfObjectsAsTabular(undefined, value, header, writer, 0) + encodeArrayOfObjectsAsTabular(undefined, value, header, writer, 0, options) } else { - encodeArrayOfObjectsAsListItems(undefined, value, writer, 0) + encodeArrayOfObjectsAsListItems(undefined, value, writer, 0, options) } return } // Mixed array: fallback to expanded format (not in spec, but safe default) - encodeMixedArrayAsListItems(undefined, value, writer, 0) + encodeMixedArrayAsListItems(undefined, value, writer, 0, options) } -export function encodeArrayProperty(key: string, value: JsonArray, writer: LineWriter, depth: Depth): void { +export function encodeArrayProperty(key: string, value: JsonArray, writer: LineWriter, depth: Depth, options: ResolvedEncodeOptions): void { if (value.length === 0) { const encodedKey = encodeKey(key) writer.push(depth, `${encodedKey}[0]:`) @@ -129,7 +129,7 @@ export function encodeArrayProperty(key: string, value: JsonArray, writer: LineW // Primitive array if (isArrayOfPrimitives(value)) { - encodeInlinePrimitiveArray(key, value, writer, depth) + encodeInlinePrimitiveArray(key, value, writer, depth, options) return } @@ -137,7 +137,7 @@ export function encodeArrayProperty(key: string, value: JsonArray, writer: LineW if (isArrayOfArrays(value)) { const allPrimitiveArrays = value.every(arr => isArrayOfPrimitives(arr)) if (allPrimitiveArrays) { - encodeArrayOfArraysAsListItems(key, value, writer, depth) + encodeArrayOfArraysAsListItems(key, value, writer, depth, options) return } } @@ -146,16 +146,16 @@ export function encodeArrayProperty(key: string, value: JsonArray, writer: LineW if (isArrayOfObjects(value)) { const header = detectTabularHeader(value) if (header) { - encodeArrayOfObjectsAsTabular(key, value, header, writer, depth) + encodeArrayOfObjectsAsTabular(key, value, header, writer, depth, options) } else { - encodeArrayOfObjectsAsListItems(key, value, writer, depth) + encodeArrayOfObjectsAsListItems(key, value, writer, depth, options) } return } // Mixed array: fallback to expanded format - encodeMixedArrayAsListItems(key, value, writer, depth) + encodeMixedArrayAsListItems(key, value, writer, depth, options) } // #endregion @@ -167,9 +167,10 @@ export function encodeInlinePrimitiveArray( values: readonly JsonPrimitive[], writer: LineWriter, depth: Depth, + options: ResolvedEncodeOptions, ): void { const header = prefix ? formatKeyedArrayHeader(prefix, values.length) : formatArrayHeader(values.length) - const joinedValue = joinEncodedValues(values) + const joinedValue = joinEncodedValues(values, options.delimiter) // Only add space if there are values if (values.length === 0) { writer.push(depth, header) @@ -188,21 +189,22 @@ export function encodeArrayOfArraysAsListItems( values: readonly JsonArray[], writer: LineWriter, depth: Depth, + options: ResolvedEncodeOptions, ): void { const header = prefix ? formatKeyedArrayHeader(prefix, values.length) : formatArrayHeader(values.length) writer.push(depth, header) for (const arr of values) { if (isArrayOfPrimitives(arr)) { - const inline = formatInlineArray(arr) + const inline = formatInlineArray(arr, options.delimiter) writer.push(depth + 1, `${LIST_ITEM_PREFIX}${inline}`) } } } -export function formatInlineArray(values: readonly JsonPrimitive[]): string { +export function formatInlineArray(values: readonly JsonPrimitive[], delimiter: string): string { const header = formatArrayHeader(values.length) - const joinedValue = joinEncodedValues(values) + const joinedValue = joinEncodedValues(values, delimiter) // Only add space if there are values if (values.length === 0) { return header @@ -220,6 +222,7 @@ export function encodeArrayOfObjectsAsTabular( header: readonly string[], writer: LineWriter, depth: Depth, + options: ResolvedEncodeOptions, ): void { const headerStr = prefix ? formatKeyedTableHeader(prefix, rows.length, header) @@ -228,7 +231,7 @@ export function encodeArrayOfObjectsAsTabular( for (const row of rows) { const values = header.map(key => row[key]) - const joinedValue = joinEncodedValues(values as JsonPrimitive[]) + const joinedValue = joinEncodedValues(values as JsonPrimitive[], options.delimiter) writer.push(depth + 1, joinedValue) } } @@ -282,6 +285,7 @@ export function encodeMixedArrayAsListItems( items: readonly JsonValue[], writer: LineWriter, depth: Depth, + options: ResolvedEncodeOptions, ): void { const header = prefix ? formatKeyedArrayHeader(prefix, items.length) : formatArrayHeader(items.length) writer.push(depth, header) @@ -289,18 +293,18 @@ export function encodeMixedArrayAsListItems( for (const item of items) { if (isJsonPrimitive(item)) { // Direct primitive as list item - writer.push(depth + 1, `${LIST_ITEM_PREFIX}${encodePrimitive(item)}`) + writer.push(depth + 1, `${LIST_ITEM_PREFIX}${encodePrimitive(item, options.delimiter)}`) } else if (isJsonArray(item)) { // Direct array as list item if (isArrayOfPrimitives(item)) { - const inline = formatInlineArray(item) + const inline = formatInlineArray(item, options.delimiter) writer.push(depth + 1, `${LIST_ITEM_PREFIX}${inline}`) } } else if (isJsonObject(item)) { // Object as list item - encodeObjectAsListItem(item, writer, depth + 1) + encodeObjectAsListItem(item, writer, depth + 1, options) } } } @@ -310,16 +314,17 @@ export function encodeArrayOfObjectsAsListItems( rows: readonly JsonObject[], writer: LineWriter, depth: Depth, + options: ResolvedEncodeOptions, ): void { const header = prefix ? formatKeyedArrayHeader(prefix, rows.length) : formatArrayHeader(rows.length) writer.push(depth, `${header}`) for (const obj of rows) { - encodeObjectAsListItem(obj, writer, depth + 1) + encodeObjectAsListItem(obj, writer, depth + 1, options) } } -export function encodeObjectAsListItem(obj: JsonObject, writer: LineWriter, depth: Depth): void { +export function encodeObjectAsListItem(obj: JsonObject, writer: LineWriter, depth: Depth, options: ResolvedEncodeOptions): void { const keys = Object.keys(obj) if (keys.length === 0) { writer.push(depth, LIST_ITEM_MARKER) @@ -332,7 +337,7 @@ export function encodeObjectAsListItem(obj: JsonObject, writer: LineWriter, dept const firstValue = obj[firstKey]! if (isJsonPrimitive(firstValue)) { - writer.push(depth, `${LIST_ITEM_PREFIX}${encodedKey}: ${encodePrimitive(firstValue)}`) + writer.push(depth, `${LIST_ITEM_PREFIX}${encodedKey}: ${encodePrimitive(firstValue, options.delimiter)}`) } else if (isJsonArray(firstValue)) { // For arrays, we need to put them on separate lines @@ -346,14 +351,14 @@ export function encodeObjectAsListItem(obj: JsonObject, writer: LineWriter, dept } else { writer.push(depth, `${LIST_ITEM_PREFIX}${encodedKey}:`) - encodeObject(firstValue, writer, depth + 2) + encodeObject(firstValue, writer, depth + 2, options) } } // Remaining keys on indented lines for (let i = 1; i < keys.length; i++) { const key = keys[i]! - encodeKeyValuePair(key, obj[key]!, writer, depth + 1) + encodeKeyValuePair(key, obj[key]!, writer, depth + 1, options) } } diff --git a/src/index.ts b/src/index.ts index 5c5a4c1..09fac44 100644 --- a/src/index.ts +++ b/src/index.ts @@ -23,5 +23,6 @@ export function encode(input: unknown, options?: EncodeOptions): string { function resolveOptions(options?: EncodeOptions): ResolvedEncodeOptions { return { indent: options?.indent ?? 2, + delimiter: options?.delimiter ?? ',', } } diff --git a/src/primitives.ts b/src/primitives.ts index 66fe1ca..79ee5d7 100644 --- a/src/primitives.ts +++ b/src/primitives.ts @@ -11,7 +11,7 @@ import { // #region Primitive encoding -export function encodePrimitive(value: JsonPrimitive): string { +export function encodePrimitive(value: JsonPrimitive, delimiter?: string): string { if (value === null) { return NULL_LITERAL } @@ -24,11 +24,11 @@ export function encodePrimitive(value: JsonPrimitive): string { return String(value) } - return encodeStringLiteral(value) + return encodeStringLiteral(value, delimiter) } -export function encodeStringLiteral(value: string): string { - if (isSafeUnquoted(value)) { +export function encodeStringLiteral(value: string, delimiter: string = COMMA): string { + if (isSafeUnquoted(value, delimiter)) { return value } @@ -44,7 +44,7 @@ export function escapeString(value: string): string { .replace(/\t/g, `${BACKSLASH}t`) } -export function isSafeUnquoted(value: string): boolean { +export function isSafeUnquoted(value: string, delimiter: string = COMMA): boolean { if (!value) { return false } @@ -61,8 +61,33 @@ export function isSafeUnquoted(value: string): boolean { return false } - // Check for structural characters: comma, colon, brackets, braces, hyphen at start, newline, carriage return, tab, double-quote - if (/[,:\n\r\t"[\]{}]/.test(value) || value.startsWith(LIST_ITEM_MARKER)) { + // Check for colon (always structural) + if (value.includes(':')) { + return false + } + + // Check for quotes and backslash (always need escaping) + if (value.includes('"') || value.includes('\\')) { + return false + } + + // Check for brackets and braces (always structural) + if (/[[\]{}]/.test(value)) { + return false + } + + // Check for control characters (newline, carriage return, tab - always need quoting/escaping) + if (/[\n\r\t]/.test(value)) { + return false + } + + // Check for the active delimiter + if (value.includes(delimiter)) { + return false + } + + // Check for hyphen at start (list marker) + if (value.startsWith(LIST_ITEM_MARKER)) { return false } @@ -98,8 +123,8 @@ function isValidUnquotedKey(key: string): boolean { // #region Value joining -export function joinEncodedValues(values: readonly JsonPrimitive[]): string { - return values.map(v => encodePrimitive(v)).join(COMMA) +export function joinEncodedValues(values: readonly JsonPrimitive[], delimiter: string = COMMA): string { + return values.map(v => encodePrimitive(v, delimiter)).join(delimiter) } // #endregion diff --git a/src/types.ts b/src/types.ts index b0ec9ea..2fef3c5 100644 --- a/src/types.ts +++ b/src/types.ts @@ -11,6 +11,11 @@ export type JsonValue = JsonPrimitive | JsonObject | JsonArray export interface EncodeOptions { indent?: number + /** + * Delimiter to use for tabular array rows and inline primitive arrays. + * @default ',' + */ + delimiter?: ',' | '\t' | '|' } export type ResolvedEncodeOptions = Readonly> diff --git a/test/index.test.ts b/test/index.test.ts index b434ee3..c0c9560 100644 --- a/test/index.test.ts +++ b/test/index.test.ts @@ -472,3 +472,146 @@ describe('non-JSON-serializable values', () => { expect(encode({ sym: Symbol('test') })).toBe('sym: null') }) }) + +describe('delimiter options', () => { + describe('basic delimiter usage', () => { + it.each([ + { delimiter: '\t' as const, name: 'tab', expected: 'admin\tops\tdev' }, + { delimiter: '|' as const, name: 'pipe', expected: 'admin|ops|dev' }, + { delimiter: ',' as const, name: 'comma', expected: 'admin,ops,dev' }, + ])('encodes primitive arrays with $name delimiter', ({ delimiter, expected }) => { + const obj = { tags: ['admin', 'ops', 'dev'] } + expect(encode(obj, { delimiter })).toBe(`tags[3]: ${expected}`) + }) + + it.each([ + { delimiter: '\t' as const, name: 'tab', expected: 'items[2]{sku,qty,price}:\n A1\t2\t9.99\n B2\t1\t14.5' }, + { delimiter: '|' as const, name: 'pipe', expected: 'items[2]{sku,qty,price}:\n A1|2|9.99\n B2|1|14.5' }, + ])('encodes tabular arrays with $name delimiter', ({ delimiter, expected }) => { + const obj = { + items: [ + { sku: 'A1', qty: 2, price: 9.99 }, + { sku: 'B2', qty: 1, price: 14.5 }, + ], + } + expect(encode(obj, { delimiter })).toBe(expected) + }) + + it.each([ + { delimiter: '\t' as const, name: 'tab', expected: 'pairs[2]:\n - [2]: a\tb\n - [2]: c\td' }, + { delimiter: '|' as const, name: 'pipe', expected: 'pairs[2]:\n - [2]: a|b\n - [2]: c|d' }, + ])('encodes nested arrays with $name delimiter', ({ delimiter, expected }) => { + const obj = { pairs: [['a', 'b'], ['c', 'd']] } + expect(encode(obj, { delimiter })).toBe(expected) + }) + + it.each([ + { delimiter: '\t' as const, name: 'tab' }, + { delimiter: '|' as const, name: 'pipe' }, + ])('encodes root arrays with $name delimiter', ({ delimiter }) => { + const arr = ['x', 'y', 'z'] + expect(encode(arr, { delimiter })).toBe(`[3]: x${delimiter}y${delimiter}z`) + }) + + it.each([ + { delimiter: '\t' as const, name: 'tab', expected: '[2]{id}:\n 1\n 2' }, + { delimiter: '|' as const, name: 'pipe', expected: '[2]{id}:\n 1\n 2' }, + ])('encodes root arrays of objects with $name delimiter', ({ delimiter, expected }) => { + const arr = [{ id: 1 }, { id: 2 }] + expect(encode(arr, { delimiter })).toBe(expected) + }) + }) + + describe('delimiter-aware quoting', () => { + it.each([ + { delimiter: '\t' as const, name: 'tab', char: '\t', input: ['a', 'b\tc', 'd'], expected: 'a\t"b\\tc"\td' }, + { delimiter: '|' as const, name: 'pipe', char: '|', input: ['a', 'b|c', 'd'], expected: 'a|"b|c"|d' }, + ])('quotes strings containing the active $name delimiter', ({ delimiter, input, expected }) => { + expect(encode({ items: input }, { delimiter })).toBe(`items[${input.length}]: ${expected}`) + }) + + it.each([ + { delimiter: '\t' as const, name: 'tab', input: ['a,b', 'c,d'], expected: 'a,b\tc,d' }, + { delimiter: '|' as const, name: 'pipe', input: ['a,b', 'c,d'], expected: 'a,b|c,d' }, + ])('does not quote commas when using $name delimiter', ({ delimiter, input, expected }) => { + expect(encode({ items: input }, { delimiter })).toBe(`items[${input.length}]: ${expected}`) + }) + + it('quotes values containing the active delimiter in tabular format', () => { + const obj = { + items: [ + { id: 1, note: 'a,b' }, + { id: 2, note: 'c,d' }, + ], + } + expect(encode(obj, { delimiter: ',' })).toBe('items[2]{id,note}:\n 1,"a,b"\n 2,"c,d"') + expect(encode(obj, { delimiter: '\t' })).toBe('items[2]{id,note}:\n 1\ta,b\n 2\tc,d') + }) + + it('does not quote commas in object values when using non-comma delimiter', () => { + expect(encode({ note: 'a,b' }, { delimiter: '|' })).toBe('note: a,b') + expect(encode({ note: 'a,b' }, { delimiter: '\t' })).toBe('note: a,b') + }) + + it('quotes nested array values containing the active delimiter', () => { + expect(encode({ pairs: [['a', 'b|c']] }, { delimiter: '|' })).toBe('pairs[1]:\n - [2]: a|"b|c"') + expect(encode({ pairs: [['a', 'b\tc']] }, { delimiter: '\t' })).toBe('pairs[1]:\n - [2]: a\t"b\\tc"') + }) + }) + + describe('delimiter-independent quoting rules', () => { + it('preserves ambiguity quoting regardless of delimiter', () => { + const obj = { items: ['true', '42', '-3.14'] } + expect(encode(obj, { delimiter: '|' })).toBe('items[3]: "true"|"42"|"-3.14"') + expect(encode(obj, { delimiter: '\t' })).toBe('items[3]: "true"\t"42"\t"-3.14"') + }) + + it('preserves structural quoting regardless of delimiter', () => { + const obj = { items: ['[5]', '{key}', '- item'] } + expect(encode(obj, { delimiter: '|' })).toBe('items[3]: "[5]"|"{key}"|"- item"') + expect(encode(obj, { delimiter: '\t' })).toBe('items[3]: "[5]"\t"{key}"\t"- item"') + }) + + it('quotes keys containing the active delimiter', () => { + expect(encode({ 'a|b': 1 }, { delimiter: '|' })).toBe('"a|b": 1') + expect(encode({ 'a\tb': 1 }, { delimiter: '\t' })).toBe('"a\\tb": 1') + }) + + it('quotes tabular headers containing the active delimiter', () => { + const obj = { items: [{ 'a|b': 1 }, { 'a|b': 2 }] } + expect(encode(obj, { delimiter: '|' })).toBe('items[2]{"a|b"}:\n 1\n 2') + }) + + it('always uses commas in tabular headers regardless of delimiter', () => { + const obj = { items: [{ a: 1, b: 2 }, { a: 3, b: 4 }] } + expect(encode(obj, { delimiter: '|' })).toBe('items[2]{a,b}:\n 1|2\n 3|4') + expect(encode(obj, { delimiter: '\t' })).toBe('items[2]{a,b}:\n 1\t2\n 3\t4') + }) + }) + + describe('formatting invariants with delimiters', () => { + it.each([ + { delimiter: '\t' as const, name: 'tab' }, + { delimiter: '|' as const, name: 'pipe' }, + ])('produces no trailing spaces with $name delimiter', ({ delimiter }) => { + const obj = { + user: { id: 123, name: 'Ada' }, + items: ['a', 'b'], + } + const result = encode(obj, { delimiter }) + const lines = result.split('\n') + for (const line of lines) { + expect(line).not.toMatch(/ $/) + } + }) + + it.each([ + { delimiter: '\t' as const, name: 'tab' }, + { delimiter: '|' as const, name: 'pipe' }, + ])('produces no trailing newline with $name delimiter', ({ delimiter }) => { + const obj = { id: 123 } + const result = encode(obj, { delimiter }) + expect(result).not.toMatch(/\n$/) + }) + }) +})