feat: parse nested tabular arrays in list items with bare hyphen (spec v2.1)

This commit is contained in:
Johann Schopplich
2025-11-24 08:42:30 +01:00
parent 3389179979
commit 327bddae55
13 changed files with 739 additions and 693 deletions

View File

@@ -4,7 +4,7 @@
[![CI](https://github.com/toon-format/toon/actions/workflows/ci.yml/badge.svg)](https://github.com/toon-format/toon/actions)
[![npm version](https://img.shields.io/npm/v/@toon-format/toon.svg)](https://www.npmjs.com/package/@toon-format/toon)
[![SPEC v2.0](https://img.shields.io/badge/spec-v2.0-lightgray)](https://github.com/toon-format/spec)
[![SPEC v2.1](https://img.shields.io/badge/spec-v2.1-lightgray)](https://github.com/toon-format/spec)
[![npm downloads (total)](https://img.shields.io/npm/dt/@toon-format/toon.svg)](https://www.npmjs.com/package/@toon-format/toon)
[![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](./LICENSE)
@@ -872,12 +872,12 @@ Comprehensive guides, references, and resources to help you get the most out of
- [API Reference](https://toonformat.dev/reference/api) TypeScript/JavaScript encode/decode API
- [Syntax Cheatsheet](https://toonformat.dev/reference/syntax-cheatsheet) Quick format lookup
- [Specification v2.0](https://github.com/toon-format/spec/blob/main/SPEC.md) Normative rules for implementers
- [Specification v2.1](https://github.com/toon-format/spec/blob/main/SPEC.md) Normative rules for implementers
## Other Implementations
> [!NOTE]
> When implementing TOON in other languages, please follow the [Specification](https://github.com/toon-format/spec/blob/main/SPEC.md) (currently v2.0) to ensure compatibility across implementations. The [conformance tests](https://github.com/toon-format/spec/tree/main/tests) provide language-agnostic test fixtures that validate your implementations.
> When implementing TOON in other languages, please follow the [Specification](https://github.com/toon-format/spec/blob/main/SPEC.md) (currently v2.1) to ensure compatibility across implementations. The [conformance tests](https://github.com/toon-format/spec/tree/main/tests) provide language-agnostic test fixtures that validate your implementations.
### Official Implementations

View File

@@ -4,7 +4,7 @@ The TOON specification has moved to a dedicated repository: [github.com/toon-for
## Current Version
**Version 2.0** (2025-11-10)
**Version 2.1** (2025-11-23)
## Quick Links

View File

@@ -8,21 +8,21 @@
"fetch:github-repos": "tsx scripts/fetch-github-repos.ts"
},
"devDependencies": {
"@ai-sdk/anthropic": "^2.0.44",
"@ai-sdk/google": "^2.0.31",
"@ai-sdk/openai": "^2.0.65",
"@ai-sdk/anthropic": "^2.0.45",
"@ai-sdk/google": "^2.0.42",
"@ai-sdk/openai": "^2.0.71",
"@ai-sdk/provider": "^2.0.0",
"@ai-sdk/xai": "^2.0.32",
"@ai-sdk/xai": "^2.0.35",
"@clack/prompts": "^0.11.0",
"@faker-js/faker": "^10.1.0",
"ai": "^5.0.92",
"ai": "^5.0.101",
"csv-stringify": "^6.6.0",
"fast-xml-parser": "^5.3.1",
"fast-xml-parser": "^5.3.2",
"gpt-tokenizer": "^3.4.0",
"ofetch": "^1.5.1",
"p-map": "^7.0.4",
"p-queue": "^9.0.0",
"unstorage": "^1.17.2",
"p-queue": "^9.0.1",
"unstorage": "^1.17.3",
"yaml": "^2.8.1"
}
}

View File

@@ -10,7 +10,7 @@ const config: Theme = {
extends: DefaultTheme,
enhanceApp({ app }) {
app.config.globalProperties.$spec = {
version: '2.0',
version: '2.1',
}
app.component('CopyOrDownloadAsMarkdownButtons', CopyOrDownloadAsMarkdownButtons)
},

View File

@@ -20,7 +20,7 @@ hero:
text: CLI
link: /cli/
- theme: alt
text: Spec v2.0
text: Spec v2.1
link: /reference/spec
features:

View File

@@ -8,7 +8,7 @@
"preview": "vitepress preview"
},
"devDependencies": {
"unocss": "^66.5.6",
"unocss": "^66.5.9",
"vitepress": "^1.6.4",
"vitepress-plugin-llms": "^1.9.3"
}

View File

@@ -9,7 +9,7 @@ You don't need this page to *use* TOON. It's mainly for implementers and contrib
## Current Version
**Spec v{{ $spec.version }}** (2025-11-10) is the current stable version.
**Spec v{{ $spec.version }}** (2025-11-23) is the current stable version.
The spec defines a provisional media type and file extension in §18.2:

View File

@@ -3,7 +3,7 @@
"type": "module",
"version": "1.3.0",
"private": true,
"packageManager": "pnpm@10.21.0",
"packageManager": "pnpm@10.23.0",
"scripts": {
"build": "pnpm -r --filter=./packages/** run build",
"automd": "automd",
@@ -18,13 +18,13 @@
},
"devDependencies": {
"@antfu/eslint-config": "^6.2.0",
"@types/node": "^24.10.0",
"@types/node": "^24.10.1",
"automd": "^0.4.2",
"bumpp": "^10.3.1",
"eslint": "^9.39.1",
"tsdown": "^0.16.2",
"tsdown": "^0.16.6",
"tsx": "^4.20.6",
"typescript": "^5.9.3",
"vitest": "^4.0.8"
"vitest": "^4.0.13"
}
}

View File

@@ -2,7 +2,7 @@
"name": "@toon-format/cli",
"type": "module",
"version": "1.3.0",
"packageManager": "pnpm@10.21.0",
"packageManager": "pnpm@10.23.0",
"description": "CLI for JSON ↔ TOON conversion using @toon-format/toon",
"author": "Johann Schopplich <hello@johannschopplich.com>",
"license": "MIT",

View File

@@ -2,7 +2,7 @@
"name": "@toon-format/toon",
"type": "module",
"version": "1.3.0",
"packageManager": "pnpm@10.21.0",
"packageManager": "pnpm@10.23.0",
"description": "Token-Oriented Object Notation (TOON) Compact, human-readable, schema-aware encoding of JSON for LLM prompts",
"author": "Johann Schopplich <hello@johannschopplich.com>",
"license": "MIT",
@@ -38,6 +38,6 @@
"test": "vitest"
},
"devDependencies": {
"@toon-format/spec": "^2.0.1"
"@toon-format/spec": "^2.1.0"
}
}

View File

@@ -411,9 +411,45 @@ function* decodeListItemSync(
let afterHyphen: string
if (line.content === LIST_ITEM_MARKER) {
yield { type: 'startObject' }
yield { type: 'endObject' }
return
// Bare list item marker: either an empty object or fields at depth +1
const followDepth = baseDepth + 1
const nextLine = cursor.peekSync()
if (!nextLine || nextLine.depth < followDepth) {
// No fields at the next depth: treat as empty object
yield { type: 'startObject' }
yield { type: 'endObject' }
return
}
if (nextLine.depth === followDepth && !nextLine.content.startsWith(LIST_ITEM_PREFIX)) {
// Fields at depth +1: parse them as an object
yield { type: 'startObject' }
while (!cursor.atEndSync()) {
const fieldLine = cursor.peekSync()
if (!fieldLine || fieldLine.depth < followDepth) {
break
}
if (fieldLine.depth === followDepth && !fieldLine.content.startsWith(LIST_ITEM_PREFIX)) {
cursor.advanceSync()
yield* decodeKeyValueSync(fieldLine.content, cursor, followDepth, options)
}
else {
break
}
}
yield { type: 'endObject' }
return
}
else {
// Next line is another list item or at a different depth: treat as empty object
yield { type: 'startObject' }
yield { type: 'endObject' }
return
}
}
else if (line.content.startsWith(LIST_ITEM_PREFIX)) {
afterHyphen = line.content.slice(LIST_ITEM_PREFIX.length)
@@ -509,7 +545,7 @@ export async function* decodeStream(
// Get first line to determine root form
const first = await cursor.peek()
if (!first) {
// Empty input decodes to empty object (matches decode('') behavior)
// Empty input decodes to empty object
yield { type: 'startObject' }
yield { type: 'endObject' }
return
@@ -770,9 +806,45 @@ async function* decodeListItemAsync(
let afterHyphen: string
if (line.content === LIST_ITEM_MARKER) {
yield { type: 'startObject' }
yield { type: 'endObject' }
return
// Bare list item marker: either an empty object or fields at depth +1
const followDepth = baseDepth + 1
const nextLine = await cursor.peek()
if (!nextLine || nextLine.depth < followDepth) {
// No fields at the next depth: treat as empty object
yield { type: 'startObject' }
yield { type: 'endObject' }
return
}
if (nextLine.depth === followDepth && !nextLine.content.startsWith(LIST_ITEM_PREFIX)) {
// Fields at depth +1: parse them as an object
yield { type: 'startObject' }
while (!cursor.atEnd()) {
const fieldLine = await cursor.peek()
if (!fieldLine || fieldLine.depth < followDepth) {
break
}
if (fieldLine.depth === followDepth && !fieldLine.content.startsWith(LIST_ITEM_PREFIX)) {
await cursor.advance()
yield* decodeKeyValueAsync(fieldLine.content, cursor, followDepth, options)
}
else {
break
}
}
yield { type: 'endObject' }
return
}
else {
// Next line is another list item or at a different depth: treat as empty object
yield { type: 'startObject' }
yield { type: 'endObject' }
return
}
}
else if (line.content.startsWith(LIST_ITEM_PREFIX)) {
afterHyphen = line.content.slice(LIST_ITEM_PREFIX.length)

View File

@@ -101,7 +101,6 @@ export function* encodeKeyValuePairLines(
}
}
// No folding applied - use standard encoding
const encodedKey = encodeKey(key)
if (isJsonPrimitive(value)) {
@@ -190,10 +189,10 @@ export function* encodeArrayOfArraysAsListItemsLines(
export function encodeInlineArrayLine(values: readonly JsonPrimitive[], delimiter: string, prefix?: string): string {
const header = formatHeader(values.length, { key: prefix, delimiter })
const joinedValue = encodeAndJoinPrimitives(values, delimiter)
// Only add space if there are values
if (values.length === 0) {
if (values.length === 0)
return header
}
return `${header} ${joinedValue}`
}
@@ -296,57 +295,25 @@ export function* encodeObjectAsListItemLines(
}
const entries = Object.entries(obj)
const [firstKey, firstValue] = entries[0]!
const encodedKey = encodeKey(firstKey)
if (isJsonPrimitive(firstValue)) {
yield indentedListItem(depth, `${encodedKey}: ${encodePrimitive(firstValue, options.delimiter)}`, options.indent)
}
else if (isJsonArray(firstValue)) {
if (isArrayOfPrimitives(firstValue)) {
// Inline format for primitive arrays
const arrayPropertyLine = encodeInlineArrayLine(firstValue, options.delimiter, firstKey)
yield indentedListItem(depth, arrayPropertyLine, options.indent)
}
else if (isArrayOfObjects(firstValue)) {
// Check if array of objects can use tabular format
const header = extractTabularHeader(firstValue)
// Compact form only when the list-item object has a single tabular array field
if (entries.length === 1) {
const [key, value] = entries[0]!
if (isJsonArray(value) && isArrayOfObjects(value)) {
const header = extractTabularHeader(value)
if (header) {
// Tabular format for uniform arrays of objects
const formattedHeader = formatHeader(firstValue.length, { key: firstKey, fields: header, delimiter: options.delimiter })
const formattedHeader = formatHeader(value.length, { key, fields: header, delimiter: options.delimiter })
yield indentedListItem(depth, formattedHeader, options.indent)
yield* writeTabularRowsLines(firstValue, header, depth + 1, options)
yield* writeTabularRowsLines(value, header, depth + 1, options)
return
}
else {
// Fall back to list format for non-uniform arrays of objects
yield indentedListItem(depth, `${encodedKey}[${firstValue.length}]:`, options.indent)
for (const item of firstValue) {
yield* encodeObjectAsListItemLines(item, depth + 1, options)
}
}
}
else {
// Complex arrays on separate lines (array of arrays, etc.)
yield indentedListItem(depth, `${encodedKey}[${firstValue.length}]:`, options.indent)
// Encode array contents at depth + 1
for (const item of firstValue) {
yield* encodeListItemValueLines(item, depth + 1, options)
}
}
}
else if (isJsonObject(firstValue)) {
yield indentedListItem(depth, `${encodedKey}:`, options.indent)
if (!isEmptyObject(firstValue)) {
yield* encodeObjectLines(firstValue, depth + 2, options)
}
}
// Remaining entries on indented lines
for (let i = 1; i < entries.length; i++) {
const [key, value] = entries[i]!
yield* encodeKeyValuePairLines(key, value, depth + 1, options)
}
// All other cases: emit a bare list item marker and all fields at depth + 1
yield indentedLine(depth, LIST_ITEM_MARKER, options.indent)
yield* encodeObjectLines(obj, depth + 1, options)
}
// #endregion

1237
pnpm-lock.yaml generated

File diff suppressed because it is too large Load Diff