From 45352879e13b042ea2840b3442ccf44ddaea1465 Mon Sep 17 00:00:00 2001 From: Johann Schopplich Date: Sat, 25 Oct 2025 19:38:45 +0200 Subject: [PATCH] feat: `lengthMarker` option --- README.md | 30 ++++++++++++++++++++++++++++++ src/encoders.ts | 24 ++++++++++++------------ src/index.ts | 1 + src/primitives.ts | 4 +++- src/types.ts | 6 ++++++ test/index.test.ts | 36 ++++++++++++++++++++++++++++++++++++ 6 files changed, 88 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 571f297..a93208c 100644 --- a/README.md +++ b/README.md @@ -523,6 +523,7 @@ Converts any JSON-serializable value to TOON format. - `options` – Optional encoding options: - `indent?: number` – Number of spaces per indentation level (default: `2`) - `delimiter?: ',' | '\t' | '|'` – Delimiter for array values and tabular rows (default: `','`) + - `lengthMarker?: '#' | false` – Optional marker to prefix array lengths (default: `false`) **Returns:** @@ -605,6 +606,35 @@ items[2|]{sku|name|qty|price}: B2|Gadget|1|14.5 ``` +#### Length Marker Option + +The `lengthMarker` option adds an optional hash (`#`) prefix to array lengths to emphasize that the bracketed value represents a count, not an index: + +```ts +import { encode } from '@byjohann/toon' + +const data = { + tags: ['admin', 'ops', 'dev'], + items: [ + { sku: 'A1', qty: 2, price: 9.99 }, + { sku: 'B2', qty: 1, price: 14.5 }, + ], +} + +console.log(encode(data, { lengthMarker: '#' })) +// tags[#3]: admin,ops,dev +// items[#2]{sku,qty,price}: +// A1,2,9.99 +// B2,1,14.5 + +// Works with custom delimiters +console.log(encode(data, { lengthMarker: '#', delimiter: '|' })) +// tags[#3|]: admin|ops|dev +// items[#2|]{sku|qty|price}: +// A1|2|9.99 +// B2|1|14.5 +``` + ## Using TOON in LLM Prompts When incorporating TOON into your LLM workflows: diff --git a/src/encoders.ts b/src/encoders.ts index d54f6de..53d81b5 100644 --- a/src/encoders.ts +++ b/src/encoders.ts @@ -88,7 +88,7 @@ export function encodeArray( options: ResolvedEncodeOptions, ): void { if (value.length === 0) { - const header = formatHeader(0, key ? { key, delimiter: options.delimiter } : { delimiter: options.delimiter }) + const header = formatHeader(0, { key, delimiter: options.delimiter, lengthMarker: options.lengthMarker }) writer.push(depth, header) return } @@ -135,7 +135,7 @@ export function encodeInlinePrimitiveArray( depth: Depth, options: ResolvedEncodeOptions, ): void { - const formatted = formatInlineArray(values, options.delimiter, prefix) + const formatted = formatInlineArray(values, options.delimiter, prefix, options.lengthMarker) writer.push(depth, formatted) } @@ -150,19 +150,19 @@ export function encodeArrayOfArraysAsListItems( depth: Depth, options: ResolvedEncodeOptions, ): void { - const header = formatHeader(values.length, prefix ? { key: prefix, delimiter: options.delimiter } : { delimiter: options.delimiter }) + const header = formatHeader(values.length, { key: prefix, delimiter: options.delimiter, lengthMarker: options.lengthMarker }) writer.push(depth, header) for (const arr of values) { if (isArrayOfPrimitives(arr)) { - const inline = formatInlineArray(arr, options.delimiter) + const inline = formatInlineArray(arr, options.delimiter, undefined, options.lengthMarker) writer.push(depth + 1, `${LIST_ITEM_PREFIX}${inline}`) } } } -export function formatInlineArray(values: readonly JsonPrimitive[], delimiter: string, prefix?: string): string { - const header = formatHeader(values.length, prefix ? { key: prefix, delimiter } : { delimiter }) +export function formatInlineArray(values: readonly JsonPrimitive[], delimiter: string, prefix?: string, lengthMarker?: '#' | false): string { + const header = formatHeader(values.length, { key: prefix, delimiter, lengthMarker }) const joinedValue = joinEncodedValues(values, delimiter) // Only add space if there are values if (values.length === 0) { @@ -183,7 +183,7 @@ export function encodeArrayOfObjectsAsTabular( depth: Depth, options: ResolvedEncodeOptions, ): void { - const headerStr = formatHeader(rows.length, { key: prefix, fields: header, delimiter: options.delimiter }) + const headerStr = formatHeader(rows.length, { key: prefix, fields: header, delimiter: options.delimiter, lengthMarker: options.lengthMarker }) writer.push(depth, `${headerStr}`) writeTabularRows(rows, header, writer, depth + 1, options) @@ -254,7 +254,7 @@ export function encodeMixedArrayAsListItems( depth: Depth, options: ResolvedEncodeOptions, ): void { - const header = formatHeader(items.length, prefix ? { key: prefix, delimiter: options.delimiter } : { delimiter: options.delimiter }) + const header = formatHeader(items.length, { key: prefix, delimiter: options.delimiter, lengthMarker: options.lengthMarker }) writer.push(depth, header) for (const item of items) { @@ -265,7 +265,7 @@ export function encodeMixedArrayAsListItems( else if (isJsonArray(item)) { // Direct array as list item if (isArrayOfPrimitives(item)) { - const inline = formatInlineArray(item, options.delimiter) + const inline = formatInlineArray(item, options.delimiter, undefined, options.lengthMarker) writer.push(depth + 1, `${LIST_ITEM_PREFIX}${inline}`) } } @@ -294,7 +294,7 @@ export function encodeObjectAsListItem(obj: JsonObject, writer: LineWriter, dept else if (isJsonArray(firstValue)) { if (isArrayOfPrimitives(firstValue)) { // Inline format for primitive arrays - const formatted = formatInlineArray(firstValue, options.delimiter, firstKey) + const formatted = formatInlineArray(firstValue, options.delimiter, firstKey, options.lengthMarker) writer.push(depth, `${LIST_ITEM_PREFIX}${formatted}`) } else if (isArrayOfObjects(firstValue)) { @@ -302,7 +302,7 @@ export function encodeObjectAsListItem(obj: JsonObject, writer: LineWriter, dept const header = detectTabularHeader(firstValue) if (header) { // Tabular format for uniform arrays of objects - const headerStr = formatHeader(firstValue.length, { key: firstKey, fields: header, delimiter: options.delimiter }) + const headerStr = formatHeader(firstValue.length, { key: firstKey, fields: header, delimiter: options.delimiter, lengthMarker: options.lengthMarker }) writer.push(depth, `${LIST_ITEM_PREFIX}${headerStr}`) writeTabularRows(firstValue, header, writer, depth + 1, options) } @@ -324,7 +324,7 @@ export function encodeObjectAsListItem(obj: JsonObject, writer: LineWriter, dept writer.push(depth + 1, `${LIST_ITEM_PREFIX}${encodePrimitive(item, options.delimiter)}`) } else if (isJsonArray(item) && isArrayOfPrimitives(item)) { - const inline = formatInlineArray(item, options.delimiter) + const inline = formatInlineArray(item, options.delimiter, undefined, options.lengthMarker) writer.push(depth + 1, `${LIST_ITEM_PREFIX}${inline}`) } else if (isJsonObject(item)) { diff --git a/src/index.ts b/src/index.ts index 43c936c..18697cd 100644 --- a/src/index.ts +++ b/src/index.ts @@ -28,5 +28,6 @@ function resolveOptions(options?: EncodeOptions): ResolvedEncodeOptions { return { indent: options?.indent ?? 2, delimiter: options?.delimiter ?? DEFAULT_DELIMITER, + lengthMarker: options?.lengthMarker ?? false, } } diff --git a/src/primitives.ts b/src/primitives.ts index 1b6ecf4..b7e826f 100644 --- a/src/primitives.ts +++ b/src/primitives.ts @@ -141,11 +141,13 @@ export function formatHeader( key?: string fields?: readonly string[] delimiter?: string + lengthMarker?: '#' | false }, ): string { const key = options?.key const fields = options?.fields const delimiter = options?.delimiter ?? COMMA + const lengthMarker = options?.lengthMarker ?? false let header = '' @@ -154,7 +156,7 @@ export function formatHeader( } // Only include delimiter if it's not the default (comma) - header += `[${length}${delimiter !== DEFAULT_DELIMITER ? delimiter : ''}]` + header += `[${lengthMarker || ''}${length}${delimiter !== DEFAULT_DELIMITER ? delimiter : ''}]` if (fields) { const quotedFields = fields.map(f => encodeKey(f)) diff --git a/src/types.ts b/src/types.ts index c6ff65e..62833c1 100644 --- a/src/types.ts +++ b/src/types.ts @@ -24,6 +24,12 @@ export interface EncodeOptions { * @default DELIMITERS.comma */ delimiter?: Delimiter + /** + * Optional marker to prefix array lengths in headers. + * When set to `#`, arrays render as [#N] instead of [N]. + * @default false + */ + lengthMarker?: '#' | false } export type ResolvedEncodeOptions = Readonly> diff --git a/test/index.test.ts b/test/index.test.ts index 7dd02ca..72ba02f 100644 --- a/test/index.test.ts +++ b/test/index.test.ts @@ -731,3 +731,39 @@ describe('delimiter options', () => { }) }) }) + +describe('length marker option', () => { + it('adds length marker to primitive arrays', () => { + const obj = { tags: ['admin', 'ops', 'dev'] } + expect(encode(obj, { lengthMarker: '#' })).toBe('tags[#3]: admin,ops,dev') + }) + + it('handles empty arrays', () => { + expect(encode({ items: [] }, { lengthMarker: '#' })).toBe('items[#0]:') + }) + + it('adds length marker to tabular arrays', () => { + const obj = { + items: [ + { sku: 'A1', qty: 2, price: 9.99 }, + { sku: 'B2', qty: 1, price: 14.5 }, + ], + } + expect(encode(obj, { lengthMarker: '#' })).toBe('items[#2]{sku,qty,price}:\n A1,2,9.99\n B2,1,14.5') + }) + + it('adds length marker to nested arrays', () => { + const obj = { pairs: [['a', 'b'], ['c', 'd']] } + expect(encode(obj, { lengthMarker: '#' })).toBe('pairs[#2]:\n - [#2]: a,b\n - [#2]: c,d') + }) + + it('works with delimiter option', () => { + const obj = { tags: ['admin', 'ops', 'dev'] } + expect(encode(obj, { lengthMarker: '#', delimiter: '|' })).toBe('tags[#3|]: admin|ops|dev') + }) + + it('default is false (no length marker)', () => { + const obj = { tags: ['admin', 'ops', 'dev'] } + expect(encode(obj)).toBe('tags[3]: admin,ops,dev') + }) +})