feat: lengthMarker option

This commit is contained in:
Johann Schopplich
2025-10-25 19:38:45 +02:00
parent 07feebc3b8
commit 45352879e1
6 changed files with 88 additions and 13 deletions

View File

@@ -523,6 +523,7 @@ Converts any JSON-serializable value to TOON format.
- `options` Optional encoding options:
- `indent?: number` Number of spaces per indentation level (default: `2`)
- `delimiter?: ',' | '\t' | '|'` Delimiter for array values and tabular rows (default: `','`)
- `lengthMarker?: '#' | false` Optional marker to prefix array lengths (default: `false`)
**Returns:**
@@ -605,6 +606,35 @@ items[2|]{sku|name|qty|price}:
B2|Gadget|1|14.5
```
#### Length Marker Option
The `lengthMarker` option adds an optional hash (`#`) prefix to array lengths to emphasize that the bracketed value represents a count, not an index:
```ts
import { encode } from '@byjohann/toon'
const data = {
tags: ['admin', 'ops', 'dev'],
items: [
{ sku: 'A1', qty: 2, price: 9.99 },
{ sku: 'B2', qty: 1, price: 14.5 },
],
}
console.log(encode(data, { lengthMarker: '#' }))
// tags[#3]: admin,ops,dev
// items[#2]{sku,qty,price}:
// A1,2,9.99
// B2,1,14.5
// Works with custom delimiters
console.log(encode(data, { lengthMarker: '#', delimiter: '|' }))
// tags[#3|]: admin|ops|dev
// items[#2|]{sku|qty|price}:
// A1|2|9.99
// B2|1|14.5
```
## Using TOON in LLM Prompts
When incorporating TOON into your LLM workflows:

View File

@@ -88,7 +88,7 @@ export function encodeArray(
options: ResolvedEncodeOptions,
): void {
if (value.length === 0) {
const header = formatHeader(0, key ? { key, delimiter: options.delimiter } : { delimiter: options.delimiter })
const header = formatHeader(0, { key, delimiter: options.delimiter, lengthMarker: options.lengthMarker })
writer.push(depth, header)
return
}
@@ -135,7 +135,7 @@ export function encodeInlinePrimitiveArray(
depth: Depth,
options: ResolvedEncodeOptions,
): void {
const formatted = formatInlineArray(values, options.delimiter, prefix)
const formatted = formatInlineArray(values, options.delimiter, prefix, options.lengthMarker)
writer.push(depth, formatted)
}
@@ -150,19 +150,19 @@ export function encodeArrayOfArraysAsListItems(
depth: Depth,
options: ResolvedEncodeOptions,
): void {
const header = formatHeader(values.length, prefix ? { key: prefix, delimiter: options.delimiter } : { delimiter: options.delimiter })
const header = formatHeader(values.length, { key: prefix, delimiter: options.delimiter, lengthMarker: options.lengthMarker })
writer.push(depth, header)
for (const arr of values) {
if (isArrayOfPrimitives(arr)) {
const inline = formatInlineArray(arr, options.delimiter)
const inline = formatInlineArray(arr, options.delimiter, undefined, options.lengthMarker)
writer.push(depth + 1, `${LIST_ITEM_PREFIX}${inline}`)
}
}
}
export function formatInlineArray(values: readonly JsonPrimitive[], delimiter: string, prefix?: string): string {
const header = formatHeader(values.length, prefix ? { key: prefix, delimiter } : { delimiter })
export function formatInlineArray(values: readonly JsonPrimitive[], delimiter: string, prefix?: string, lengthMarker?: '#' | false): string {
const header = formatHeader(values.length, { key: prefix, delimiter, lengthMarker })
const joinedValue = joinEncodedValues(values, delimiter)
// Only add space if there are values
if (values.length === 0) {
@@ -183,7 +183,7 @@ export function encodeArrayOfObjectsAsTabular(
depth: Depth,
options: ResolvedEncodeOptions,
): void {
const headerStr = formatHeader(rows.length, { key: prefix, fields: header, delimiter: options.delimiter })
const headerStr = formatHeader(rows.length, { key: prefix, fields: header, delimiter: options.delimiter, lengthMarker: options.lengthMarker })
writer.push(depth, `${headerStr}`)
writeTabularRows(rows, header, writer, depth + 1, options)
@@ -254,7 +254,7 @@ export function encodeMixedArrayAsListItems(
depth: Depth,
options: ResolvedEncodeOptions,
): void {
const header = formatHeader(items.length, prefix ? { key: prefix, delimiter: options.delimiter } : { delimiter: options.delimiter })
const header = formatHeader(items.length, { key: prefix, delimiter: options.delimiter, lengthMarker: options.lengthMarker })
writer.push(depth, header)
for (const item of items) {
@@ -265,7 +265,7 @@ export function encodeMixedArrayAsListItems(
else if (isJsonArray(item)) {
// Direct array as list item
if (isArrayOfPrimitives(item)) {
const inline = formatInlineArray(item, options.delimiter)
const inline = formatInlineArray(item, options.delimiter, undefined, options.lengthMarker)
writer.push(depth + 1, `${LIST_ITEM_PREFIX}${inline}`)
}
}
@@ -294,7 +294,7 @@ export function encodeObjectAsListItem(obj: JsonObject, writer: LineWriter, dept
else if (isJsonArray(firstValue)) {
if (isArrayOfPrimitives(firstValue)) {
// Inline format for primitive arrays
const formatted = formatInlineArray(firstValue, options.delimiter, firstKey)
const formatted = formatInlineArray(firstValue, options.delimiter, firstKey, options.lengthMarker)
writer.push(depth, `${LIST_ITEM_PREFIX}${formatted}`)
}
else if (isArrayOfObjects(firstValue)) {
@@ -302,7 +302,7 @@ export function encodeObjectAsListItem(obj: JsonObject, writer: LineWriter, dept
const header = detectTabularHeader(firstValue)
if (header) {
// Tabular format for uniform arrays of objects
const headerStr = formatHeader(firstValue.length, { key: firstKey, fields: header, delimiter: options.delimiter })
const headerStr = formatHeader(firstValue.length, { key: firstKey, fields: header, delimiter: options.delimiter, lengthMarker: options.lengthMarker })
writer.push(depth, `${LIST_ITEM_PREFIX}${headerStr}`)
writeTabularRows(firstValue, header, writer, depth + 1, options)
}
@@ -324,7 +324,7 @@ export function encodeObjectAsListItem(obj: JsonObject, writer: LineWriter, dept
writer.push(depth + 1, `${LIST_ITEM_PREFIX}${encodePrimitive(item, options.delimiter)}`)
}
else if (isJsonArray(item) && isArrayOfPrimitives(item)) {
const inline = formatInlineArray(item, options.delimiter)
const inline = formatInlineArray(item, options.delimiter, undefined, options.lengthMarker)
writer.push(depth + 1, `${LIST_ITEM_PREFIX}${inline}`)
}
else if (isJsonObject(item)) {

View File

@@ -28,5 +28,6 @@ function resolveOptions(options?: EncodeOptions): ResolvedEncodeOptions {
return {
indent: options?.indent ?? 2,
delimiter: options?.delimiter ?? DEFAULT_DELIMITER,
lengthMarker: options?.lengthMarker ?? false,
}
}

View File

@@ -141,11 +141,13 @@ export function formatHeader(
key?: string
fields?: readonly string[]
delimiter?: string
lengthMarker?: '#' | false
},
): string {
const key = options?.key
const fields = options?.fields
const delimiter = options?.delimiter ?? COMMA
const lengthMarker = options?.lengthMarker ?? false
let header = ''
@@ -154,7 +156,7 @@ export function formatHeader(
}
// Only include delimiter if it's not the default (comma)
header += `[${length}${delimiter !== DEFAULT_DELIMITER ? delimiter : ''}]`
header += `[${lengthMarker || ''}${length}${delimiter !== DEFAULT_DELIMITER ? delimiter : ''}]`
if (fields) {
const quotedFields = fields.map(f => encodeKey(f))

View File

@@ -24,6 +24,12 @@ export interface EncodeOptions {
* @default DELIMITERS.comma
*/
delimiter?: Delimiter
/**
* Optional marker to prefix array lengths in headers.
* When set to `#`, arrays render as [#N] instead of [N].
* @default false
*/
lengthMarker?: '#' | false
}
export type ResolvedEncodeOptions = Readonly<Required<EncodeOptions>>

View File

@@ -731,3 +731,39 @@ describe('delimiter options', () => {
})
})
})
describe('length marker option', () => {
it('adds length marker to primitive arrays', () => {
const obj = { tags: ['admin', 'ops', 'dev'] }
expect(encode(obj, { lengthMarker: '#' })).toBe('tags[#3]: admin,ops,dev')
})
it('handles empty arrays', () => {
expect(encode({ items: [] }, { lengthMarker: '#' })).toBe('items[#0]:')
})
it('adds length marker to tabular arrays', () => {
const obj = {
items: [
{ sku: 'A1', qty: 2, price: 9.99 },
{ sku: 'B2', qty: 1, price: 14.5 },
],
}
expect(encode(obj, { lengthMarker: '#' })).toBe('items[#2]{sku,qty,price}:\n A1,2,9.99\n B2,1,14.5')
})
it('adds length marker to nested arrays', () => {
const obj = { pairs: [['a', 'b'], ['c', 'd']] }
expect(encode(obj, { lengthMarker: '#' })).toBe('pairs[#2]:\n - [#2]: a,b\n - [#2]: c,d')
})
it('works with delimiter option', () => {
const obj = { tags: ['admin', 'ops', 'dev'] }
expect(encode(obj, { lengthMarker: '#', delimiter: '|' })).toBe('tags[#3|]: admin|ops|dev')
})
it('default is false (no length marker)', () => {
const obj = { tags: ['admin', 'ops', 'dev'] }
expect(encode(obj)).toBe('tags[3]: admin,ops,dev')
})
})