mirror of
https://github.com/voson-wang/toon.git
synced 2026-01-29 15:24:10 +08:00
feat: parse nested tabular arrays in list items with bare hyphen (spec v2.1)
This commit is contained in:
@@ -2,7 +2,7 @@
|
||||
"name": "@toon-format/cli",
|
||||
"type": "module",
|
||||
"version": "1.3.0",
|
||||
"packageManager": "pnpm@10.21.0",
|
||||
"packageManager": "pnpm@10.23.0",
|
||||
"description": "CLI for JSON ↔ TOON conversion using @toon-format/toon",
|
||||
"author": "Johann Schopplich <hello@johannschopplich.com>",
|
||||
"license": "MIT",
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
"name": "@toon-format/toon",
|
||||
"type": "module",
|
||||
"version": "1.3.0",
|
||||
"packageManager": "pnpm@10.21.0",
|
||||
"packageManager": "pnpm@10.23.0",
|
||||
"description": "Token-Oriented Object Notation (TOON) – Compact, human-readable, schema-aware encoding of JSON for LLM prompts",
|
||||
"author": "Johann Schopplich <hello@johannschopplich.com>",
|
||||
"license": "MIT",
|
||||
@@ -38,6 +38,6 @@
|
||||
"test": "vitest"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@toon-format/spec": "^2.0.1"
|
||||
"@toon-format/spec": "^2.1.0"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -411,9 +411,45 @@ function* decodeListItemSync(
|
||||
let afterHyphen: string
|
||||
|
||||
if (line.content === LIST_ITEM_MARKER) {
|
||||
yield { type: 'startObject' }
|
||||
yield { type: 'endObject' }
|
||||
return
|
||||
// Bare list item marker: either an empty object or fields at depth +1
|
||||
const followDepth = baseDepth + 1
|
||||
const nextLine = cursor.peekSync()
|
||||
|
||||
if (!nextLine || nextLine.depth < followDepth) {
|
||||
// No fields at the next depth: treat as empty object
|
||||
yield { type: 'startObject' }
|
||||
yield { type: 'endObject' }
|
||||
return
|
||||
}
|
||||
|
||||
if (nextLine.depth === followDepth && !nextLine.content.startsWith(LIST_ITEM_PREFIX)) {
|
||||
// Fields at depth +1: parse them as an object
|
||||
yield { type: 'startObject' }
|
||||
|
||||
while (!cursor.atEndSync()) {
|
||||
const fieldLine = cursor.peekSync()
|
||||
if (!fieldLine || fieldLine.depth < followDepth) {
|
||||
break
|
||||
}
|
||||
|
||||
if (fieldLine.depth === followDepth && !fieldLine.content.startsWith(LIST_ITEM_PREFIX)) {
|
||||
cursor.advanceSync()
|
||||
yield* decodeKeyValueSync(fieldLine.content, cursor, followDepth, options)
|
||||
}
|
||||
else {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
yield { type: 'endObject' }
|
||||
return
|
||||
}
|
||||
else {
|
||||
// Next line is another list item or at a different depth: treat as empty object
|
||||
yield { type: 'startObject' }
|
||||
yield { type: 'endObject' }
|
||||
return
|
||||
}
|
||||
}
|
||||
else if (line.content.startsWith(LIST_ITEM_PREFIX)) {
|
||||
afterHyphen = line.content.slice(LIST_ITEM_PREFIX.length)
|
||||
@@ -509,7 +545,7 @@ export async function* decodeStream(
|
||||
// Get first line to determine root form
|
||||
const first = await cursor.peek()
|
||||
if (!first) {
|
||||
// Empty input decodes to empty object (matches decode('') behavior)
|
||||
// Empty input decodes to empty object
|
||||
yield { type: 'startObject' }
|
||||
yield { type: 'endObject' }
|
||||
return
|
||||
@@ -770,9 +806,45 @@ async function* decodeListItemAsync(
|
||||
let afterHyphen: string
|
||||
|
||||
if (line.content === LIST_ITEM_MARKER) {
|
||||
yield { type: 'startObject' }
|
||||
yield { type: 'endObject' }
|
||||
return
|
||||
// Bare list item marker: either an empty object or fields at depth +1
|
||||
const followDepth = baseDepth + 1
|
||||
const nextLine = await cursor.peek()
|
||||
|
||||
if (!nextLine || nextLine.depth < followDepth) {
|
||||
// No fields at the next depth: treat as empty object
|
||||
yield { type: 'startObject' }
|
||||
yield { type: 'endObject' }
|
||||
return
|
||||
}
|
||||
|
||||
if (nextLine.depth === followDepth && !nextLine.content.startsWith(LIST_ITEM_PREFIX)) {
|
||||
// Fields at depth +1: parse them as an object
|
||||
yield { type: 'startObject' }
|
||||
|
||||
while (!cursor.atEnd()) {
|
||||
const fieldLine = await cursor.peek()
|
||||
if (!fieldLine || fieldLine.depth < followDepth) {
|
||||
break
|
||||
}
|
||||
|
||||
if (fieldLine.depth === followDepth && !fieldLine.content.startsWith(LIST_ITEM_PREFIX)) {
|
||||
await cursor.advance()
|
||||
yield* decodeKeyValueAsync(fieldLine.content, cursor, followDepth, options)
|
||||
}
|
||||
else {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
yield { type: 'endObject' }
|
||||
return
|
||||
}
|
||||
else {
|
||||
// Next line is another list item or at a different depth: treat as empty object
|
||||
yield { type: 'startObject' }
|
||||
yield { type: 'endObject' }
|
||||
return
|
||||
}
|
||||
}
|
||||
else if (line.content.startsWith(LIST_ITEM_PREFIX)) {
|
||||
afterHyphen = line.content.slice(LIST_ITEM_PREFIX.length)
|
||||
|
||||
@@ -101,7 +101,6 @@ export function* encodeKeyValuePairLines(
|
||||
}
|
||||
}
|
||||
|
||||
// No folding applied - use standard encoding
|
||||
const encodedKey = encodeKey(key)
|
||||
|
||||
if (isJsonPrimitive(value)) {
|
||||
@@ -190,10 +189,10 @@ export function* encodeArrayOfArraysAsListItemsLines(
|
||||
export function encodeInlineArrayLine(values: readonly JsonPrimitive[], delimiter: string, prefix?: string): string {
|
||||
const header = formatHeader(values.length, { key: prefix, delimiter })
|
||||
const joinedValue = encodeAndJoinPrimitives(values, delimiter)
|
||||
// Only add space if there are values
|
||||
if (values.length === 0) {
|
||||
|
||||
if (values.length === 0)
|
||||
return header
|
||||
}
|
||||
|
||||
return `${header} ${joinedValue}`
|
||||
}
|
||||
|
||||
@@ -296,57 +295,25 @@ export function* encodeObjectAsListItemLines(
|
||||
}
|
||||
|
||||
const entries = Object.entries(obj)
|
||||
const [firstKey, firstValue] = entries[0]!
|
||||
const encodedKey = encodeKey(firstKey)
|
||||
|
||||
if (isJsonPrimitive(firstValue)) {
|
||||
yield indentedListItem(depth, `${encodedKey}: ${encodePrimitive(firstValue, options.delimiter)}`, options.indent)
|
||||
}
|
||||
else if (isJsonArray(firstValue)) {
|
||||
if (isArrayOfPrimitives(firstValue)) {
|
||||
// Inline format for primitive arrays
|
||||
const arrayPropertyLine = encodeInlineArrayLine(firstValue, options.delimiter, firstKey)
|
||||
yield indentedListItem(depth, arrayPropertyLine, options.indent)
|
||||
}
|
||||
else if (isArrayOfObjects(firstValue)) {
|
||||
// Check if array of objects can use tabular format
|
||||
const header = extractTabularHeader(firstValue)
|
||||
// Compact form only when the list-item object has a single tabular array field
|
||||
if (entries.length === 1) {
|
||||
const [key, value] = entries[0]!
|
||||
|
||||
if (isJsonArray(value) && isArrayOfObjects(value)) {
|
||||
const header = extractTabularHeader(value)
|
||||
if (header) {
|
||||
// Tabular format for uniform arrays of objects
|
||||
const formattedHeader = formatHeader(firstValue.length, { key: firstKey, fields: header, delimiter: options.delimiter })
|
||||
const formattedHeader = formatHeader(value.length, { key, fields: header, delimiter: options.delimiter })
|
||||
yield indentedListItem(depth, formattedHeader, options.indent)
|
||||
yield* writeTabularRowsLines(firstValue, header, depth + 1, options)
|
||||
yield* writeTabularRowsLines(value, header, depth + 1, options)
|
||||
return
|
||||
}
|
||||
else {
|
||||
// Fall back to list format for non-uniform arrays of objects
|
||||
yield indentedListItem(depth, `${encodedKey}[${firstValue.length}]:`, options.indent)
|
||||
for (const item of firstValue) {
|
||||
yield* encodeObjectAsListItemLines(item, depth + 1, options)
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Complex arrays on separate lines (array of arrays, etc.)
|
||||
yield indentedListItem(depth, `${encodedKey}[${firstValue.length}]:`, options.indent)
|
||||
|
||||
// Encode array contents at depth + 1
|
||||
for (const item of firstValue) {
|
||||
yield* encodeListItemValueLines(item, depth + 1, options)
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (isJsonObject(firstValue)) {
|
||||
yield indentedListItem(depth, `${encodedKey}:`, options.indent)
|
||||
if (!isEmptyObject(firstValue)) {
|
||||
yield* encodeObjectLines(firstValue, depth + 2, options)
|
||||
}
|
||||
}
|
||||
|
||||
// Remaining entries on indented lines
|
||||
for (let i = 1; i < entries.length; i++) {
|
||||
const [key, value] = entries[i]!
|
||||
yield* encodeKeyValuePairLines(key, value, depth + 1, options)
|
||||
}
|
||||
// All other cases: emit a bare list item marker and all fields at depth + 1
|
||||
yield indentedLine(depth, LIST_ITEM_MARKER, options.indent)
|
||||
yield* encodeObjectLines(obj, depth + 1, options)
|
||||
}
|
||||
|
||||
// #endregion
|
||||
|
||||
Reference in New Issue
Block a user