feat: parse nested tabular arrays in list items with bare hyphen (spec v2.1)

This commit is contained in:
Johann Schopplich
2025-11-24 08:42:30 +01:00
parent 3389179979
commit 327bddae55
13 changed files with 739 additions and 693 deletions

View File

@@ -2,7 +2,7 @@
"name": "@toon-format/cli",
"type": "module",
"version": "1.3.0",
"packageManager": "pnpm@10.21.0",
"packageManager": "pnpm@10.23.0",
"description": "CLI for JSON ↔ TOON conversion using @toon-format/toon",
"author": "Johann Schopplich <hello@johannschopplich.com>",
"license": "MIT",

View File

@@ -2,7 +2,7 @@
"name": "@toon-format/toon",
"type": "module",
"version": "1.3.0",
"packageManager": "pnpm@10.21.0",
"packageManager": "pnpm@10.23.0",
"description": "Token-Oriented Object Notation (TOON) Compact, human-readable, schema-aware encoding of JSON for LLM prompts",
"author": "Johann Schopplich <hello@johannschopplich.com>",
"license": "MIT",
@@ -38,6 +38,6 @@
"test": "vitest"
},
"devDependencies": {
"@toon-format/spec": "^2.0.1"
"@toon-format/spec": "^2.1.0"
}
}

View File

@@ -411,9 +411,45 @@ function* decodeListItemSync(
let afterHyphen: string
if (line.content === LIST_ITEM_MARKER) {
yield { type: 'startObject' }
yield { type: 'endObject' }
return
// Bare list item marker: either an empty object or fields at depth +1
const followDepth = baseDepth + 1
const nextLine = cursor.peekSync()
if (!nextLine || nextLine.depth < followDepth) {
// No fields at the next depth: treat as empty object
yield { type: 'startObject' }
yield { type: 'endObject' }
return
}
if (nextLine.depth === followDepth && !nextLine.content.startsWith(LIST_ITEM_PREFIX)) {
// Fields at depth +1: parse them as an object
yield { type: 'startObject' }
while (!cursor.atEndSync()) {
const fieldLine = cursor.peekSync()
if (!fieldLine || fieldLine.depth < followDepth) {
break
}
if (fieldLine.depth === followDepth && !fieldLine.content.startsWith(LIST_ITEM_PREFIX)) {
cursor.advanceSync()
yield* decodeKeyValueSync(fieldLine.content, cursor, followDepth, options)
}
else {
break
}
}
yield { type: 'endObject' }
return
}
else {
// Next line is another list item or at a different depth: treat as empty object
yield { type: 'startObject' }
yield { type: 'endObject' }
return
}
}
else if (line.content.startsWith(LIST_ITEM_PREFIX)) {
afterHyphen = line.content.slice(LIST_ITEM_PREFIX.length)
@@ -509,7 +545,7 @@ export async function* decodeStream(
// Get first line to determine root form
const first = await cursor.peek()
if (!first) {
// Empty input decodes to empty object (matches decode('') behavior)
// Empty input decodes to empty object
yield { type: 'startObject' }
yield { type: 'endObject' }
return
@@ -770,9 +806,45 @@ async function* decodeListItemAsync(
let afterHyphen: string
if (line.content === LIST_ITEM_MARKER) {
yield { type: 'startObject' }
yield { type: 'endObject' }
return
// Bare list item marker: either an empty object or fields at depth +1
const followDepth = baseDepth + 1
const nextLine = await cursor.peek()
if (!nextLine || nextLine.depth < followDepth) {
// No fields at the next depth: treat as empty object
yield { type: 'startObject' }
yield { type: 'endObject' }
return
}
if (nextLine.depth === followDepth && !nextLine.content.startsWith(LIST_ITEM_PREFIX)) {
// Fields at depth +1: parse them as an object
yield { type: 'startObject' }
while (!cursor.atEnd()) {
const fieldLine = await cursor.peek()
if (!fieldLine || fieldLine.depth < followDepth) {
break
}
if (fieldLine.depth === followDepth && !fieldLine.content.startsWith(LIST_ITEM_PREFIX)) {
await cursor.advance()
yield* decodeKeyValueAsync(fieldLine.content, cursor, followDepth, options)
}
else {
break
}
}
yield { type: 'endObject' }
return
}
else {
// Next line is another list item or at a different depth: treat as empty object
yield { type: 'startObject' }
yield { type: 'endObject' }
return
}
}
else if (line.content.startsWith(LIST_ITEM_PREFIX)) {
afterHyphen = line.content.slice(LIST_ITEM_PREFIX.length)

View File

@@ -101,7 +101,6 @@ export function* encodeKeyValuePairLines(
}
}
// No folding applied - use standard encoding
const encodedKey = encodeKey(key)
if (isJsonPrimitive(value)) {
@@ -190,10 +189,10 @@ export function* encodeArrayOfArraysAsListItemsLines(
export function encodeInlineArrayLine(values: readonly JsonPrimitive[], delimiter: string, prefix?: string): string {
const header = formatHeader(values.length, { key: prefix, delimiter })
const joinedValue = encodeAndJoinPrimitives(values, delimiter)
// Only add space if there are values
if (values.length === 0) {
if (values.length === 0)
return header
}
return `${header} ${joinedValue}`
}
@@ -296,57 +295,25 @@ export function* encodeObjectAsListItemLines(
}
const entries = Object.entries(obj)
const [firstKey, firstValue] = entries[0]!
const encodedKey = encodeKey(firstKey)
if (isJsonPrimitive(firstValue)) {
yield indentedListItem(depth, `${encodedKey}: ${encodePrimitive(firstValue, options.delimiter)}`, options.indent)
}
else if (isJsonArray(firstValue)) {
if (isArrayOfPrimitives(firstValue)) {
// Inline format for primitive arrays
const arrayPropertyLine = encodeInlineArrayLine(firstValue, options.delimiter, firstKey)
yield indentedListItem(depth, arrayPropertyLine, options.indent)
}
else if (isArrayOfObjects(firstValue)) {
// Check if array of objects can use tabular format
const header = extractTabularHeader(firstValue)
// Compact form only when the list-item object has a single tabular array field
if (entries.length === 1) {
const [key, value] = entries[0]!
if (isJsonArray(value) && isArrayOfObjects(value)) {
const header = extractTabularHeader(value)
if (header) {
// Tabular format for uniform arrays of objects
const formattedHeader = formatHeader(firstValue.length, { key: firstKey, fields: header, delimiter: options.delimiter })
const formattedHeader = formatHeader(value.length, { key, fields: header, delimiter: options.delimiter })
yield indentedListItem(depth, formattedHeader, options.indent)
yield* writeTabularRowsLines(firstValue, header, depth + 1, options)
yield* writeTabularRowsLines(value, header, depth + 1, options)
return
}
else {
// Fall back to list format for non-uniform arrays of objects
yield indentedListItem(depth, `${encodedKey}[${firstValue.length}]:`, options.indent)
for (const item of firstValue) {
yield* encodeObjectAsListItemLines(item, depth + 1, options)
}
}
}
else {
// Complex arrays on separate lines (array of arrays, etc.)
yield indentedListItem(depth, `${encodedKey}[${firstValue.length}]:`, options.indent)
// Encode array contents at depth + 1
for (const item of firstValue) {
yield* encodeListItemValueLines(item, depth + 1, options)
}
}
}
else if (isJsonObject(firstValue)) {
yield indentedListItem(depth, `${encodedKey}:`, options.indent)
if (!isEmptyObject(firstValue)) {
yield* encodeObjectLines(firstValue, depth + 2, options)
}
}
// Remaining entries on indented lines
for (let i = 1; i < entries.length; i++) {
const [key, value] = entries[i]!
yield* encodeKeyValuePairLines(key, value, depth + 1, options)
}
// All other cases: emit a bare list item marker and all fields at depth + 1
yield indentedLine(depth, LIST_ITEM_MARKER, options.indent)
yield* encodeObjectLines(obj, depth + 1, options)
}
// #endregion