diff --git a/README.md b/README.md index ed67028..9b20ad6 100644 --- a/README.md +++ b/README.md @@ -50,21 +50,25 @@ users[2]{id,name,role}: ``` ⭐ GitHub Repositories ██████████████░░░░░░░░░░░ 8,745 tokens - vs JSON: 15,145 💰 42.3% saved - vs XML: 17,095 💰 48.8% saved + vs JSON: 15,145 (-42.3%) + vs YAML: 13,129 (-33.4%) + vs XML: 17,095 (-48.8%) 📈 Daily Analytics ██████████░░░░░░░░░░░░░░░ 4,507 tokens - vs JSON: 10,977 💰 58.9% saved - vs XML: 13,128 💰 65.7% saved + vs JSON: 10,977 (-58.9%) + vs YAML: 8,810 (-48.8%) + vs XML: 13,128 (-65.7%) 🛒 E-Commerce Order ████████████████░░░░░░░░░ 166 tokens - vs JSON: 257 💰 35.4% saved - vs XML: 271 💰 38.7% saved + vs JSON: 257 (-35.4%) + vs YAML: 197 (-15.7%) + vs XML: 271 (-38.7%) ───────────────────────────────────────────────────────────────────── -Total ████████████░░░░░░░░░░░░░ 13,418 tokens - vs JSON: 26,379 💰 49.1% saved - vs XML: 30,494 💰 56.0% saved +Total █████████████░░░░░░░░░░░░ 13,418 tokens + vs JSON: 26,379 (-49.1%) + vs YAML: 22,136 (-39.4%) + vs XML: 30,494 (-56.0%) ```
@@ -371,7 +375,7 @@ Four datasets designed to test different structural patterns: #### Evaluation Process -1. **Format conversion:** Each dataset is converted to all 5 formats (TOON, CSV, XML, JSON, YAML). +1. **Format conversion**: Each dataset is converted to all 5 formats (TOON, CSV, XML, JSON, YAML). 2. **Query LLM**: Each model receives formatted data + question in a prompt and extracts the answer. 3. **Validate with LLM-as-judge**: `gpt-5-nano` validates if the answer is semantically correct (e.g., `50000` = `$50,000`, `Engineering` = `engineering`, `2025-01-01` = `January 1, 2025`). @@ -764,6 +768,48 @@ encode(data, { lengthMarker: '#', delimiter: '|' }) // B2|1|14.5 ``` +### `decode(input: string, options?: DecodeOptions): JsonValue` + +Converts a TOON-formatted string back to JavaScript values. + +**Parameters:** + +- `input` – A TOON-formatted string to parse +- `options` – Optional decoding options: + - `indent?: number` – Expected number of spaces per indentation level (default: `2`) + - `strict?: boolean` – Enable strict validation (default: `true`) + +**Returns:** + +A JavaScript value (object, array, or primitive) representing the parsed TOON data. + +**Example:** + +```ts +import { decode } from '@byjohann/toon' + +const toon = `items[2]{sku,qty,price}: + A1,2,9.99 + B2,1,14.5` + +const data = decode(toon) +// { +// items: [ +// { sku: 'A1', qty: 2, price: 9.99 }, +// { sku: 'B2', qty: 1, price: 14.5 } +// ] +// } +``` + +**Strict Mode:** + +By default, the decoder validates input strictly: + +- **Invalid escape sequences** – Throws on `"\x"`, unterminated strings +- **Syntax errors** – Throws on missing colons, malformed headers +- **Array length mismatches** – Throws when declared length doesn't match actual count +- **Delimiter mismatches** – Throws when row delimiters don't match header + ## Notes and Limitations - Format familiarity matters as much as token count. TOON's tabular format requires arrays of objects with identical keys and primitive values only – when this doesn't hold (due to mixed types, non-uniform objects, or nested structures), TOON switches to list format where JSON can be cheaper at scale. @@ -785,7 +831,7 @@ Wrap your encoded data in a fenced code block (label it \`\`\`toon for clarity). For output, be more explicit. When you want the model to **generate** TOON: - **Show the expected header** (`users[N]{id,name,role}:`). The model fills rows instead of repeating keys, reducing generation errors. -- **State the rules**: 2-space indent, no trailing spaces, `[N]` matches row count. +- **State the rules:** 2-space indent, no trailing spaces, `[N]` matches row count. Here's a prompt that works for both reading and generating: @@ -850,16 +896,16 @@ Task: Return only users with role "user" as TOON. Use the same header. Set [N] t ## Ports in Other Languages -- **Elixir**: [toon_ex](https://github.com/kentaro/toon_ex) -- **PHP**: [toon-php](https://github.com/HelgeSverre/toon-php) -- **Python**: [pytoon](https://github.com/bpradana/pytoon) +- **Elixir:** [toon_ex](https://github.com/kentaro/toon_ex) +- **PHP:** [toon-php](https://github.com/HelgeSverre/toon-php) +- **Python:** [pytoon](https://github.com/bpradana/pytoon) - [python-toon](https://github.com/xaviviro/python-toon) - [toon-python](https://gitlab.com/KanTakahiro/toon-python) -- **Ruby**: [toon-ruby](https://github.com/andrepcg/toon-ruby) -- **Java**: [JToon](https://github.com/felipestanzani/JToon) -- **.NET**: [toon.NET](https://github.com/ghost1face/toon.NET) -- **Swift**: [TOONEncoder](https://github.com/mattt/TOONEncoder) -- **Go** [gotoon](https://github.com/alpkeskin/gotoon) +- **Ruby:** [toon-ruby](https://github.com/andrepcg/toon-ruby) +- **Java:** [JToon](https://github.com/felipestanzani/JToon) +- **.NET:** [toon.NET](https://github.com/ghost1face/toon.NET) +- **Swift:** [TOONEncoder](https://github.com/mattt/TOONEncoder) +- **Go:** [gotoon](https://github.com/alpkeskin/gotoon) ## License diff --git a/benchmarks/results/retrieval-accuracy.md b/benchmarks/results/retrieval-accuracy.md index 2e1cb37..c2c5fb1 100644 --- a/benchmarks/results/retrieval-accuracy.md +++ b/benchmarks/results/retrieval-accuracy.md @@ -159,7 +159,7 @@ Four datasets designed to test different structural patterns: #### Evaluation Process -1. **Format conversion:** Each dataset is converted to all 5 formats (TOON, CSV, XML, JSON, YAML). +1. **Format conversion**: Each dataset is converted to all 5 formats (TOON, CSV, XML, JSON, YAML). 2. **Query LLM**: Each model receives formatted data + question in a prompt and extracts the answer. 3. **Validate with LLM-as-judge**: `gpt-5-nano` validates if the answer is semantically correct (e.g., `50000` = `$50,000`, `Engineering` = `engineering`, `2025-01-01` = `January 1, 2025`). diff --git a/benchmarks/src/report.ts b/benchmarks/src/report.ts index df12691..28bcf66 100644 --- a/benchmarks/src/report.ts +++ b/benchmarks/src/report.ts @@ -248,7 +248,7 @@ ${totalQuestions} questions are generated dynamically across three categories: #### Evaluation Process -1. **Format conversion:** Each dataset is converted to all ${formatCount} formats (${formatResults.map(f => f.format.toUpperCase()).join(', ')}). +1. **Format conversion**: Each dataset is converted to all ${formatCount} formats (${formatResults.map(f => f.format.toUpperCase()).join(', ')}). 2. **Query LLM**: Each model receives formatted data + question in a prompt and extracts the answer. 3. **Validate with LLM-as-judge**: \`gpt-5-nano\` validates if the answer is semantically correct (e.g., \`50000\` = \`$50,000\`, \`Engineering\` = \`engineering\`, \`2025-01-01\` = \`January 1, 2025\`). diff --git a/src/constants.ts b/src/constants.ts index 91a4264..33ceb36 100644 --- a/src/constants.ts +++ b/src/constants.ts @@ -11,6 +11,7 @@ export const COMMA = ',' export const COLON = ':' export const SPACE = ' ' export const PIPE = '|' +export const HASH = '#' // #endregion diff --git a/src/decoders.ts b/src/decoders.ts new file mode 100644 index 0000000..142de2a --- /dev/null +++ b/src/decoders.ts @@ -0,0 +1,419 @@ +import type { LineCursor } from './scanner' +import type { + ArrayHeaderInfo, + Depth, + JsonArray, + JsonObject, + JsonPrimitive, + JsonValue, + ParsedLine, + ResolvedDecodeOptions, +} from './types' +import { + COLON, + DEFAULT_DELIMITER, + LIST_ITEM_PREFIX, +} from './constants' +import { + isArrayHeaderAfterHyphen, + isObjectFirstFieldAfterHyphen, + parseArrayHeaderLine, + parseKeyToken, + parsePrimitiveToken, + parseRowValuesToPrimitives, + splitDelimitedValues, +} from './parser' + +// #region Entry decoding + +export function decodeValueFromLines(cursor: LineCursor, options: ResolvedDecodeOptions): JsonValue { + const first = cursor.peek() + if (!first) { + throw new Error('No content to decode') + } + + // Check for root array + if (isRootArrayHeaderLine(first)) { + const headerInfo = parseArrayHeaderLine(first.content, DEFAULT_DELIMITER) + if (headerInfo) { + cursor.advance() // Move past the header line + return decodeArrayFromHeader(headerInfo.header, first, cursor, 0, options) + } + } + + // Check for single primitive value + if (cursor.length === 1 && !isKeyValueLine(first)) { + return parsePrimitiveToken(first.content.trim()) + } + + // Default to object + return decodeObject(cursor, 0, options) +} + +function isRootArrayHeaderLine(line: ParsedLine): boolean { + const content = line.content.trim() + // Root array: starts with [ and has a colon + return content.startsWith('[') && content.includes(COLON) +} + +function isKeyValueLine(line: ParsedLine): boolean { + const content = line.content + // Look for unquoted colon or quoted key followed by colon + if (content.startsWith('"')) { + // Quoted key + let i = 1 + while (i < content.length) { + if (content[i] === '\\' && i + 1 < content.length) { + i += 2 + continue + } + if (content[i] === '"') { + // Found end of quoted key, check for colon + return content[i + 1] === COLON + } + i++ + } + return false + } + else { + // Unquoted key - look for first colon not inside quotes + return content.includes(COLON) + } +} + +// #endregion + +// #region Object decoding + +function decodeObject(cursor: LineCursor, baseDepth: Depth, options: ResolvedDecodeOptions): JsonObject { + const obj: JsonObject = {} + + while (!cursor.atEnd()) { + const line = cursor.peek() + if (!line || line.depth < baseDepth) { + break + } + + if (line.depth === baseDepth) { + const [key, value] = decodeKeyValuePair(line, cursor, baseDepth, options) + obj[key] = value + } + else { + break + } + } + + return obj +} + +function decodeKeyValuePair( + line: ParsedLine, + cursor: LineCursor, + baseDepth: Depth, + options: ResolvedDecodeOptions, +): [key: string, value: JsonValue] { + cursor.advance() + + // Check for array header first (before parsing key) + const arrayHeader = parseArrayHeaderLine(line.content, DEFAULT_DELIMITER) + if (arrayHeader && arrayHeader.header.key) { + const value = decodeArrayFromHeader(arrayHeader.header, line, cursor, baseDepth, options) + return [arrayHeader.header.key, value] + } + + // Regular key-value pair + const { key, end } = parseKeyToken(line.content, 0) + const rest = line.content.slice(end).trim() + + // No value after colon - expect nested object or empty + if (!rest) { + const nextLine = cursor.peek() + if (nextLine && nextLine.depth > baseDepth) { + const nested = expectNestedObject(cursor, baseDepth + 1, options) + return [key, nested] + } + // Empty object + return [key, {}] + } + + // Inline primitive value + const value = parsePrimitiveToken(rest) + return [key, value] +} + +function expectNestedObject(cursor: LineCursor, nestedDepth: Depth, options: ResolvedDecodeOptions): JsonObject { + return decodeObject(cursor, nestedDepth, options) +} + +// #endregion + +// #region Array decoding + +function decodeArrayFromHeader( + header: ArrayHeaderInfo, + line: ParsedLine, + cursor: LineCursor, + baseDepth: Depth, + options: ResolvedDecodeOptions, +): JsonArray { + const arrayHeader = parseArrayHeaderLine(line.content, DEFAULT_DELIMITER) + if (!arrayHeader) { + throw new Error('Invalid array header') + } + + // Inline primitive array + if (arrayHeader.inlineValues) { + // For inline arrays, cursor should already be advanced or will be by caller + return decodeInlinePrimitiveArray(header, arrayHeader.inlineValues, options) + } + + // For multi-line arrays (tabular or list), the cursor should already be positioned + // at the array header line, but we haven't advanced past it yet + + // Tabular array + if (header.fields && header.fields.length > 0) { + return decodeTabularArray(header, cursor, baseDepth, options) + } + + // List array + return decodeListArray(header, cursor, baseDepth, options) +} + +function decodeInlinePrimitiveArray( + header: ArrayHeaderInfo, + inlineValues: string, + options: ResolvedDecodeOptions, +): JsonPrimitive[] { + if (!inlineValues.trim()) { + assertExpectedCount(0, header.length, 'inline array items', options) + return [] + } + + const values = splitDelimitedValues(inlineValues, header.delimiter) + const primitives = parseRowValuesToPrimitives(values) + + assertExpectedCount(primitives.length, header.length, 'inline array items', options) + + return primitives +} + +function decodeListArray( + header: ArrayHeaderInfo, + cursor: LineCursor, + baseDepth: Depth, + options: ResolvedDecodeOptions, +): JsonValue[] { + const items: JsonValue[] = [] + const itemDepth = baseDepth + 1 + + while (!cursor.atEnd() && items.length < header.length) { + const line = cursor.peek() + if (!line || line.depth < itemDepth) { + break + } + + if (line.depth === itemDepth && line.content.startsWith(LIST_ITEM_PREFIX)) { + const item = decodeListItem(cursor, itemDepth, header.delimiter, options) + items.push(item) + } + else { + break + } + } + + assertExpectedCount(items.length, header.length, 'list array items', options) + + // In strict mode, check for extra items + if (options.strict && !cursor.atEnd()) { + const nextLine = cursor.peek() + if (nextLine && nextLine.depth === itemDepth && nextLine.content.startsWith(LIST_ITEM_PREFIX)) { + throw new Error(`Expected ${header.length} list array items, but found more`) + } + } + + return items +} + +function decodeTabularArray( + header: ArrayHeaderInfo, + cursor: LineCursor, + baseDepth: Depth, + options: ResolvedDecodeOptions, +): JsonObject[] { + const objects: JsonObject[] = [] + const rowDepth = baseDepth + 1 + + while (!cursor.atEnd() && objects.length < header.length) { + const line = cursor.peek() + if (!line || line.depth < rowDepth) { + break + } + + if (line.depth === rowDepth) { + cursor.advance() + const values = splitDelimitedValues(line.content, header.delimiter) + assertExpectedCount(values.length, header.fields!.length, 'tabular row values', options) + + const primitives = parseRowValuesToPrimitives(values) + const obj: JsonObject = {} + + for (let i = 0; i < header.fields!.length; i++) { + obj[header.fields![i]!] = primitives[i]! + } + + objects.push(obj) + } + else { + break + } + } + + assertExpectedCount(objects.length, header.length, 'tabular rows', options) + + // In strict mode, check for extra rows + if (options.strict && !cursor.atEnd()) { + const nextLine = cursor.peek() + if (nextLine && nextLine.depth === rowDepth && !nextLine.content.startsWith(LIST_ITEM_PREFIX)) { + // A key-value pair has a colon (and if it has delimiter, colon comes first) + // A data row either has no colon, or has delimiter before colon + const hasColon = nextLine.content.includes(COLON) + const hasDelimiter = nextLine.content.includes(header.delimiter) + + if (!hasColon) { + // No colon = data row (for single-field tables) + throw new Error(`Expected ${header.length} tabular rows, but found more`) + } + else if (hasDelimiter) { + // Has both colon and delimiter - check which comes first + const colonPos = nextLine.content.indexOf(COLON) + const delimiterPos = nextLine.content.indexOf(header.delimiter) + if (delimiterPos < colonPos) { + // Delimiter before colon = data row + throw new Error(`Expected ${header.length} tabular rows, but found more`) + } + // Colon before delimiter = key-value pair, OK + } + // Has colon but no delimiter = key-value pair, OK + } + } + + return objects +} + +// #endregion + +// #region List item decoding + +function decodeListItem( + cursor: LineCursor, + baseDepth: Depth, + activeDelimiter: string, + options: ResolvedDecodeOptions, +): JsonValue { + const line = cursor.next() + if (!line) { + throw new Error('Expected list item') + } + + const afterHyphen = line.content.slice(LIST_ITEM_PREFIX.length) + + // Check for array header after hyphen + if (isArrayHeaderAfterHyphen(afterHyphen)) { + const arrayHeader = parseArrayHeaderLine(afterHyphen, activeDelimiter as any) + if (arrayHeader) { + return decodeArrayFromHeader(arrayHeader.header, line, cursor, baseDepth, options) + } + } + + // Check for object first field after hyphen + if (isObjectFirstFieldAfterHyphen(afterHyphen)) { + return decodeObjectFromListItem(line, cursor, baseDepth, options) + } + + // Primitive value + return parsePrimitiveToken(afterHyphen) +} + +function decodeObjectFromListItem( + firstLine: ParsedLine, + cursor: LineCursor, + baseDepth: Depth, + options: ResolvedDecodeOptions, +): JsonObject { + const afterHyphen = firstLine.content.slice(LIST_ITEM_PREFIX.length) + const { key, value, followDepth } = decodeFirstFieldOnHyphen(afterHyphen, cursor, baseDepth, options) + + const obj: JsonObject = { [key]: value } + + // Read subsequent fields + while (!cursor.atEnd()) { + const line = cursor.peek() + if (!line || line.depth < followDepth) { + break + } + + if (line.depth === followDepth && !line.content.startsWith(LIST_ITEM_PREFIX)) { + const [k, v] = decodeKeyValuePair(line, cursor, followDepth, options) + obj[k] = v + } + else { + break + } + } + + return obj +} + +function decodeFirstFieldOnHyphen( + rest: string, + cursor: LineCursor, + baseDepth: Depth, + options: ResolvedDecodeOptions, +): { key: string, value: JsonValue, followDepth: Depth } { + // Check for array header as first field + const arrayHeader = parseArrayHeaderLine(rest, DEFAULT_DELIMITER) + if (arrayHeader) { + // Create a synthetic line for array decoding + const syntheticLine: ParsedLine = { + raw: rest, + content: rest, + indent: baseDepth * options.indent, + depth: baseDepth, + } + + const value = decodeArrayFromHeader(arrayHeader.header, syntheticLine, cursor, baseDepth, options) + + // After an array, subsequent fields are at baseDepth + 1 (where array content is) + return { + key: arrayHeader.header.key!, + value, + followDepth: baseDepth + 1, + } + } + + // Regular key-value pair + const { key, end } = parseKeyToken(rest, 0) + const afterKey = rest.slice(end).trim() + + if (!afterKey) { + // Nested object + const nested = expectNestedObject(cursor, baseDepth + 1, options) + return { key, value: nested, followDepth: baseDepth + 1 } + } + + // Inline primitive + const value = parsePrimitiveToken(afterKey) + return { key, value, followDepth: baseDepth + 1 } +} + +// #endregion + +// #region Validation + +function assertExpectedCount(actual: number, expected: number, what: string, options: ResolvedDecodeOptions): void { + if (options.strict && actual !== expected) { + throw new Error(`Expected ${expected} ${what}, but got ${actual}`) + } +} + +// #endregion diff --git a/src/index.ts b/src/index.ts index 18697cd..90e8e8a 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,13 +1,19 @@ import type { + DecodeOptions, EncodeOptions, + JsonValue, + ResolvedDecodeOptions, ResolvedEncodeOptions, } from './types' import { DEFAULT_DELIMITER } from './constants' +import { decodeValueFromLines } from './decoders' import { encodeValue } from './encoders' import { normalizeValue } from './normalize' +import { LineCursor, toParsedLines } from './scanner' export { DEFAULT_DELIMITER, DELIMITERS } from './constants' export type { + DecodeOptions, Delimiter, DelimiterKey, EncodeOptions, @@ -15,6 +21,7 @@ export type { JsonObject, JsonPrimitive, JsonValue, + ResolvedDecodeOptions, ResolvedEncodeOptions, } from './types' @@ -24,6 +31,18 @@ export function encode(input: unknown, options?: EncodeOptions): string { return encodeValue(normalizedValue, resolvedOptions) } +export function decode(input: string, options?: DecodeOptions): JsonValue { + const resolved = resolveDecodeOptions(options) + const lines = toParsedLines(input, resolved.indent) + + if (lines.length === 0) { + throw new Error('Cannot decode empty input') + } + + const cursor = new LineCursor(lines) + return decodeValueFromLines(cursor, resolved) +} + function resolveOptions(options?: EncodeOptions): ResolvedEncodeOptions { return { indent: options?.indent ?? 2, @@ -31,3 +50,10 @@ function resolveOptions(options?: EncodeOptions): ResolvedEncodeOptions { lengthMarker: options?.lengthMarker ?? false, } } + +function resolveDecodeOptions(options?: DecodeOptions): ResolvedDecodeOptions { + return { + indent: options?.indent ?? 2, + strict: options?.strict ?? true, + } +} diff --git a/src/parser.ts b/src/parser.ts new file mode 100644 index 0000000..f806e9a --- /dev/null +++ b/src/parser.ts @@ -0,0 +1,393 @@ +import type { + ArrayHeaderInfo, + Delimiter, + JsonPrimitive, +} from './types' +import { + BACKSLASH, + CARRIAGE_RETURN, + CLOSE_BRACE, + CLOSE_BRACKET, + COLON, + DELIMITERS, + DOUBLE_QUOTE, + FALSE_LITERAL, + HASH, + NEWLINE, + NULL_LITERAL, + OPEN_BRACE, + OPEN_BRACKET, + PIPE, + TAB, + TRUE_LITERAL, +} from './constants' + +// #region Array header parsing + +export function parseArrayHeaderLine( + content: string, + defaultDelimiter: Delimiter, +): { header: ArrayHeaderInfo, inlineValues?: string } | undefined { + // Don't match if the line starts with a quote (it's a quoted key, not an array) + if (content.trimStart().startsWith(DOUBLE_QUOTE)) { + return undefined + } + + // Find the bracket segment first + const bracketStart = content.indexOf(OPEN_BRACKET) + if (bracketStart === -1) { + return undefined + } + + const bracketEnd = content.indexOf(CLOSE_BRACKET, bracketStart) + if (bracketEnd === -1) { + return undefined + } + + // Find the colon that comes after all brackets and braces + let colonIndex = bracketEnd + 1 + let braceEnd = colonIndex + + // Check for fields segment (braces come after bracket) + const braceStart = content.indexOf(OPEN_BRACE, bracketEnd) + if (braceStart !== -1 && braceStart < content.indexOf(COLON, bracketEnd)) { + const foundBraceEnd = content.indexOf(CLOSE_BRACE, braceStart) + if (foundBraceEnd !== -1) { + braceEnd = foundBraceEnd + 1 + } + } + + // Now find colon after brackets and braces + colonIndex = content.indexOf(COLON, Math.max(bracketEnd, braceEnd)) + if (colonIndex === -1) { + return undefined + } + + const key = bracketStart > 0 ? content.slice(0, bracketStart) : undefined + const afterColon = content.slice(colonIndex + 1).trim() + + const bracketContent = content.slice(bracketStart + 1, bracketEnd) + + // Try to parse bracket segment; return undefined if it fails + let parsedBracket + try { + parsedBracket = parseBracketSegment(bracketContent, defaultDelimiter) + } + catch { + return undefined + } + + const { length, delimiter, hasLengthMarker } = parsedBracket + + // Check for fields segment + let fields: string[] | undefined + if (braceStart !== -1 && braceStart < colonIndex) { + const foundBraceEnd = content.indexOf(CLOSE_BRACE, braceStart) + if (foundBraceEnd !== -1 && foundBraceEnd < colonIndex) { + const fieldsContent = content.slice(braceStart + 1, foundBraceEnd) + fields = parseFieldsSegment(fieldsContent, delimiter) + } + } + + return { + header: { + key, + length, + delimiter, + fields, + hasLengthMarker, + }, + inlineValues: afterColon || undefined, + } +} + +export function parseBracketSegment( + seg: string, + defaultDelimiter: Delimiter, +): { length: number, delimiter: Delimiter, hasLengthMarker: boolean } { + let hasLengthMarker = false + let content = seg + + // Check for length marker + if (content.startsWith(HASH)) { + hasLengthMarker = true + content = content.slice(1) + } + + // Check for delimiter suffix + let delimiter = defaultDelimiter + if (content.endsWith(TAB)) { + delimiter = DELIMITERS.tab + content = content.slice(0, -1) + } + else if (content.endsWith(PIPE)) { + delimiter = DELIMITERS.pipe + content = content.slice(0, -1) + } + + const length = Number.parseInt(content, 10) + if (Number.isNaN(length)) { + throw new TypeError(`Invalid array length: ${seg}`) + } + + return { length, delimiter, hasLengthMarker } +} + +export function parseFieldsSegment(seg: string, delimiter: Delimiter): string[] { + return splitDelimitedValues(seg, delimiter).map(field => parseStringLiteral(field.trim())) +} + +// #endregion + +// #region Delimited value parsing + +export function splitDelimitedValues(input: string, delimiter: Delimiter): string[] { + const values: string[] = [] + let current = '' + let inQuotes = false + let i = 0 + + while (i < input.length) { + const char = input[i] + + if (char === BACKSLASH && i + 1 < input.length && inQuotes) { + // Escape sequence in quoted string + current += char + input[i + 1] + i += 2 + continue + } + + if (char === DOUBLE_QUOTE) { + inQuotes = !inQuotes + current += char + i++ + continue + } + + if (char === delimiter && !inQuotes) { + values.push(current.trim()) + current = '' + i++ + continue + } + + current += char + i++ + } + + // Add last value + if (current || values.length > 0) { + values.push(current.trim()) + } + + return values +} + +export function parseRowValuesToPrimitives(values: string[]): JsonPrimitive[] { + return values.map(v => parsePrimitiveToken(v)) +} + +// #endregion + +// #region Primitive and key parsing + +export function parsePrimitiveToken(token: string): JsonPrimitive { + const trimmed = token.trim() + + // Empty token + if (!trimmed) { + return '' + } + + // Quoted string (if starts with quote, it MUST be properly quoted) + if (trimmed.startsWith(DOUBLE_QUOTE)) { + return parseStringLiteral(trimmed) + } + + // Boolean or null literals + if (isBooleanOrNullLiteral(trimmed)) { + if (trimmed === TRUE_LITERAL) + return true + if (trimmed === FALSE_LITERAL) + return false + if (trimmed === NULL_LITERAL) + return null + } + + // Numeric literal + if (isNumericLiteral(trimmed)) { + return Number.parseFloat(trimmed) + } + + // Unquoted string + return trimmed +} + +export function isBooleanOrNullLiteral(token: string): boolean { + return token === TRUE_LITERAL || token === FALSE_LITERAL || token === NULL_LITERAL +} + +export function isNumericLiteral(token: string): boolean { + if (!token) + return false + + // Must not have leading zeros (except for "0" itself or decimals like "0.5") + if (token.length > 1 && token[0] === '0' && token[1] !== '.') { + return false + } + + // Check if it's a valid number + const num = Number(token) + return !Number.isNaN(num) && Number.isFinite(num) +} + +export function parseStringLiteral(token: string): string { + const trimmed = token.trim() + + if (trimmed.startsWith(DOUBLE_QUOTE)) { + // Find the closing quote, accounting for escaped quotes + let i = 1 + while (i < trimmed.length) { + if (trimmed[i] === BACKSLASH && i + 1 < trimmed.length) { + // Skip escaped character + i += 2 + continue + } + if (trimmed[i] === DOUBLE_QUOTE) { + // Found closing quote + if (i !== trimmed.length - 1) { + throw new Error('Unexpected characters after closing quote') + } + const content = trimmed.slice(1, i) + return unescapeString(content) + } + i++ + } + + // If we get here, no closing quote was found + throw new Error('Unterminated string: missing closing quote') + } + + return trimmed +} + +export function unescapeString(value: string): string { + let result = '' + let i = 0 + + while (i < value.length) { + if (value[i] === BACKSLASH) { + if (i + 1 >= value.length) { + throw new Error('Invalid escape sequence: backslash at end of string') + } + + const next = value[i + 1] + if (next === 'n') { + result += NEWLINE + i += 2 + continue + } + if (next === 't') { + result += TAB + i += 2 + continue + } + if (next === 'r') { + result += CARRIAGE_RETURN + i += 2 + continue + } + if (next === BACKSLASH) { + result += BACKSLASH + i += 2 + continue + } + if (next === DOUBLE_QUOTE) { + result += DOUBLE_QUOTE + i += 2 + continue + } + + throw new Error(`Invalid escape sequence: \\${next}`) + } + + result += value[i] + i++ + } + + return result +} + +export function parseUnquotedKey(content: string, start: number): { key: string, end: number } { + let end = start + while (end < content.length && content[end] !== COLON) { + end++ + } + + // Validate that a colon was found + if (end >= content.length || content[end] !== COLON) { + throw new Error('Missing colon after key') + } + + const key = content.slice(start, end).trim() + + // Skip the colon + end++ + + return { key, end } +} + +export function parseQuotedKey(content: string, start: number): { key: string, end: number } { + let i = start + 1 // Skip opening quote + let keyContent = '' + + while (i < content.length) { + if (content[i] === BACKSLASH && i + 1 < content.length) { + keyContent += content[i]! + content[i + 1] + i += 2 + continue + } + + if (content[i] === DOUBLE_QUOTE) { + // Found closing quote + const key = unescapeString(keyContent) + let end = i + 1 + + // Validate and skip colon after quoted key + if (end >= content.length || content[end] !== COLON) { + throw new Error('Missing colon after key') + } + end++ + + return { key, end } + } + + keyContent += content[i] + i++ + } + + throw new Error('Unterminated quoted key') +} + +export function parseKeyToken(content: string, start: number): { key: string, end: number } { + if (content[start] === DOUBLE_QUOTE) { + return parseQuotedKey(content, start) + } + else { + return parseUnquotedKey(content, start) + } +} + +// #endregion + +// #region Array content detection helpers + +export function isArrayHeaderAfterHyphen(content: string): boolean { + return content.trim().startsWith(OPEN_BRACKET) && content.includes(COLON) +} + +export function isObjectFirstFieldAfterHyphen(content: string): boolean { + return content.includes(COLON) +} + +// #endregion diff --git a/src/scanner.ts b/src/scanner.ts new file mode 100644 index 0000000..34b6650 --- /dev/null +++ b/src/scanner.ts @@ -0,0 +1,63 @@ +import type { Depth, ParsedLine } from './types' +import { SPACE } from './constants' + +export class LineCursor { + private lines: ParsedLine[] + private index: number + + constructor(lines: ParsedLine[]) { + this.lines = lines + this.index = 0 + } + + peek(): ParsedLine | undefined { + return this.lines[this.index] + } + + next(): ParsedLine | undefined { + return this.lines[this.index++] + } + + current(): ParsedLine | undefined { + return this.index > 0 ? this.lines[this.index - 1] : undefined + } + + advance(): void { + this.index++ + } + + atEnd(): boolean { + return this.index >= this.lines.length + } + + get length(): number { + return this.lines.length + } +} + +export function toParsedLines(source: string, indentSize: number): ParsedLine[] { + if (!source.trim()) { + return [] + } + + const lines = source.split('\n') + const parsed: ParsedLine[] = [] + + for (const raw of lines) { + let indent = 0 + while (indent < raw.length && raw[indent] === SPACE) { + indent++ + } + + const content = raw.slice(indent) + const depth = computeDepthFromIndent(indent, indentSize) + + parsed.push({ raw, indent, content, depth }) + } + + return parsed +} + +function computeDepthFromIndent(indentSpaces: number, indentSize: number): Depth { + return Math.floor(indentSpaces / indentSize) +} diff --git a/src/types.ts b/src/types.ts index 62833c1..e97f148 100644 --- a/src/types.ts +++ b/src/types.ts @@ -36,4 +36,42 @@ export type ResolvedEncodeOptions = Readonly> // #endregion +// #region Decoder options + +export interface DecodeOptions { + /** + * Number of spaces per indentation level. + * @default 2 + */ + indent?: number + /** + * When true, enforce strict validation of array lengths and tabular row counts. + * @default true + */ + strict?: boolean +} + +export type ResolvedDecodeOptions = Readonly> + +// #endregion + +// #region Decoder parsing types + +export interface ArrayHeaderInfo { + key?: string + length: number + delimiter: Delimiter + fields?: string[] + hasLengthMarker: boolean +} + +export interface ParsedLine { + raw: string + depth: Depth + indent: number + content: string +} + +// #endregion + export type Depth = number diff --git a/test/decode.test.ts b/test/decode.test.ts new file mode 100644 index 0000000..005ee02 --- /dev/null +++ b/test/decode.test.ts @@ -0,0 +1,494 @@ +import { describe, expect, it } from 'vitest' +import { decode } from '../src/index' + +describe('primitives', () => { + it('decodes safe unquoted strings', () => { + expect(decode('hello')).toBe('hello') + expect(decode('Ada_99')).toBe('Ada_99') + }) + + it('decodes quoted strings and unescapes control characters', () => { + expect(decode('""')).toBe('') + expect(decode('"line1\\nline2"')).toBe('line1\nline2') + expect(decode('"tab\\there"')).toBe('tab\there') + expect(decode('"return\\rcarriage"')).toBe('return\rcarriage') + expect(decode('"C:\\\\Users\\\\path"')).toBe('C:\\Users\\path') + expect(decode('"say \\"hello\\""')).toBe('say "hello"') + }) + + it('decodes unicode and emoji', () => { + expect(decode('café')).toBe('café') + expect(decode('你好')).toBe('你好') + expect(decode('🚀')).toBe('🚀') + expect(decode('hello 👋 world')).toBe('hello 👋 world') + }) + + it('decodes numbers, booleans and null', () => { + expect(decode('42')).toBe(42) + expect(decode('3.14')).toBe(3.14) + expect(decode('-7')).toBe(-7) + expect(decode('true')).toBe(true) + expect(decode('false')).toBe(false) + expect(decode('null')).toBe(null) + }) + + it('respects ambiguity quoting (quoted primitives remain strings)', () => { + expect(decode('"true"')).toBe('true') + expect(decode('"false"')).toBe('false') + expect(decode('"null"')).toBe('null') + expect(decode('"42"')).toBe('42') + expect(decode('"-3.14"')).toBe('-3.14') + expect(decode('"1e-6"')).toBe('1e-6') + expect(decode('"05"')).toBe('05') + }) +}) + +describe('objects (simple)', () => { + it('parses objects with primitive values', () => { + const toon = 'id: 123\nname: Ada\nactive: true' + expect(decode(toon)).toEqual({ id: 123, name: 'Ada', active: true }) + }) + + it('parses null values in objects', () => { + const toon = 'id: 123\nvalue: null' + expect(decode(toon)).toEqual({ id: 123, value: null }) + }) + + it('parses empty nested object header', () => { + expect(decode('user:')).toEqual({ user: {} }) + }) + + it('parses quoted object values with special characters and escapes', () => { + expect(decode('note: "a:b"')).toEqual({ note: 'a:b' }) + expect(decode('note: "a,b"')).toEqual({ note: 'a,b' }) + expect(decode('text: "line1\\nline2"')).toEqual({ text: 'line1\nline2' }) + expect(decode('text: "say \\"hello\\""')).toEqual({ text: 'say "hello"' }) + expect(decode('text: " padded "')).toEqual({ text: ' padded ' }) + expect(decode('text: " "')).toEqual({ text: ' ' }) + expect(decode('v: "true"')).toEqual({ v: 'true' }) + expect(decode('v: "42"')).toEqual({ v: '42' }) + expect(decode('v: "-7.5"')).toEqual({ v: '-7.5' }) + }) +}) + +describe('objects (keys)', () => { + it('parses quoted keys with special characters and escapes', () => { + expect(decode('"order:id": 7')).toEqual({ 'order:id': 7 }) + expect(decode('"[index]": 5')).toEqual({ '[index]': 5 }) + expect(decode('"{key}": 5')).toEqual({ '{key}': 5 }) + expect(decode('"a,b": 1')).toEqual({ 'a,b': 1 }) + expect(decode('"full name": Ada')).toEqual({ 'full name': 'Ada' }) + expect(decode('"-lead": 1')).toEqual({ '-lead': 1 }) + expect(decode('" a ": 1')).toEqual({ ' a ': 1 }) + expect(decode('"123": x')).toEqual({ 123: 'x' }) + expect(decode('"": 1')).toEqual({ '': 1 }) + }) + + it('parses dotted keys as identifiers', () => { + expect(decode('user.name: Ada')).toEqual({ 'user.name': 'Ada' }) + expect(decode('_private: 1')).toEqual({ _private: 1 }) + expect(decode('user_name: 1')).toEqual({ user_name: 1 }) + }) + + it('unescapes control characters and quotes in keys', () => { + expect(decode('"line\\nbreak": 1')).toEqual({ 'line\nbreak': 1 }) + expect(decode('"tab\\there": 2')).toEqual({ 'tab\there': 2 }) + expect(decode('"he said \\"hi\\"": 1')).toEqual({ 'he said "hi"': 1 }) + }) +}) + +describe('nested objects', () => { + it('parses deeply nested objects with indentation', () => { + const toon = 'a:\n b:\n c: deep' + expect(decode(toon)).toEqual({ a: { b: { c: 'deep' } } }) + }) +}) + +describe('arrays of primitives', () => { + it('parses string arrays inline', () => { + const toon = 'tags[3]: reading,gaming,coding' + expect(decode(toon)).toEqual({ tags: ['reading', 'gaming', 'coding'] }) + }) + + it('parses number arrays inline', () => { + const toon = 'nums[3]: 1,2,3' + expect(decode(toon)).toEqual({ nums: [1, 2, 3] }) + }) + + it('parses mixed primitive arrays inline', () => { + const toon = 'data[4]: x,y,true,10' + expect(decode(toon)).toEqual({ data: ['x', 'y', true, 10] }) + }) + + it('parses empty arrays', () => { + expect(decode('items[0]:')).toEqual({ items: [] }) + }) + + it('parses quoted strings in arrays including empty and whitespace-only', () => { + expect(decode('items[1]: ""')).toEqual({ items: [''] }) + expect(decode('items[3]: a,"",b')).toEqual({ items: ['a', '', 'b'] }) + expect(decode('items[2]: " "," "')).toEqual({ items: [' ', ' '] }) + }) + + it('parses strings with delimiters and structural tokens in arrays', () => { + expect(decode('items[3]: a,"b,c","d:e"')).toEqual({ items: ['a', 'b,c', 'd:e'] }) + expect(decode('items[4]: x,"true","42","-3.14"')).toEqual({ items: ['x', 'true', '42', '-3.14'] }) + expect(decode('items[3]: "[5]","- item","{key}"')).toEqual({ items: ['[5]', '- item', '{key}'] }) + }) +}) + +describe('arrays of objects (tabular and list items)', () => { + it('parses tabular arrays of uniform objects', () => { + const toon = 'items[2]{sku,qty,price}:\n A1,2,9.99\n B2,1,14.5' + expect(decode(toon)).toEqual({ + items: [ + { sku: 'A1', qty: 2, price: 9.99 }, + { sku: 'B2', qty: 1, price: 14.5 }, + ], + }) + }) + + it('parses nulls and quoted values in tabular rows', () => { + const toon = 'items[2]{id,value}:\n 1,null\n 2,"test"' + expect(decode(toon)).toEqual({ + items: [ + { id: 1, value: null }, + { id: 2, value: 'test' }, + ], + }) + }) + + it('parses quoted header keys in tabular arrays', () => { + const toon = 'items[2]{"order:id","full name"}:\n 1,Ada\n 2,Bob' + expect(decode(toon)).toEqual({ + items: [ + { 'order:id': 1, 'full name': 'Ada' }, + { 'order:id': 2, 'full name': 'Bob' }, + ], + }) + }) + + it('parses list arrays for non-uniform objects', () => { + const toon + = 'items[2]:\n' + + ' - id: 1\n' + + ' name: First\n' + + ' - id: 2\n' + + ' name: Second\n' + + ' extra: true' + expect(decode(toon)).toEqual({ + items: [ + { id: 1, name: 'First' }, + { id: 2, name: 'Second', extra: true }, + ], + }) + }) + + it('parses objects with nested values inside list items', () => { + const toon + = 'items[1]:\n' + + ' - id: 1\n' + + ' nested:\n' + + ' x: 1' + expect(decode(toon)).toEqual({ + items: [{ id: 1, nested: { x: 1 } }], + }) + }) + + it('parses nested tabular arrays as first field on hyphen line', () => { + const toon + = 'items[1]:\n' + + ' - users[2]{id,name}:\n' + + ' 1,Ada\n' + + ' 2,Bob\n' + + ' status: active' + expect(decode(toon)).toEqual({ + items: [ + { + users: [ + { id: 1, name: 'Ada' }, + { id: 2, name: 'Bob' }, + ], + status: 'active', + }, + ], + }) + }) + + it('parses objects containing arrays (including empty arrays) in list format', () => { + const toon + = 'items[1]:\n' + + ' - name: test\n' + + ' data[0]:' + expect(decode(toon)).toEqual({ + items: [{ name: 'test', data: [] }], + }) + }) + + it('parses arrays of arrays within objects', () => { + const toon + = 'items[1]:\n' + + ' - matrix[2]:\n' + + ' - [2]: 1,2\n' + + ' - [2]: 3,4\n' + + ' name: grid' + expect(decode(toon)).toEqual({ + items: [{ matrix: [[1, 2], [3, 4]], name: 'grid' }], + }) + }) +}) + +describe('arrays of arrays (primitives only)', () => { + it('parses nested arrays of primitives', () => { + const toon = 'pairs[2]:\n - [2]: a,b\n - [2]: c,d' + expect(decode(toon)).toEqual({ pairs: [['a', 'b'], ['c', 'd']] }) + }) + + it('parses quoted strings and mixed lengths in nested arrays', () => { + const toon = 'pairs[2]:\n - [2]: a,b\n - [3]: "c,d","e:f","true"' + expect(decode(toon)).toEqual({ pairs: [['a', 'b'], ['c,d', 'e:f', 'true']] }) + }) + + it('parses empty inner arrays', () => { + const toon = 'pairs[2]:\n - [0]:\n - [0]:' + expect(decode(toon)).toEqual({ pairs: [[], []] }) + }) + + it('parses mixed-length inner arrays', () => { + const toon = 'pairs[2]:\n - [1]: 1\n - [2]: 2,3' + expect(decode(toon)).toEqual({ pairs: [[1], [2, 3]] }) + }) +}) + +describe('root arrays', () => { + it('parses root arrays of primitives (inline)', () => { + const toon = '[5]: x,y,"true",true,10' + expect(decode(toon)).toEqual(['x', 'y', 'true', true, 10]) + }) + + it('parses root arrays of uniform objects in tabular format', () => { + const toon = '[2]{id}:\n 1\n 2' + expect(decode(toon)).toEqual([{ id: 1 }, { id: 2 }]) + }) + + it('parses root arrays of non-uniform objects in list format', () => { + const toon = '[2]:\n - id: 1\n - id: 2\n name: Ada' + expect(decode(toon)).toEqual([{ id: 1 }, { id: 2, name: 'Ada' }]) + }) + + it('parses empty root arrays', () => { + expect(decode('[0]:')).toEqual([]) + }) + + it('parses root arrays of arrays', () => { + const toon = '[2]:\n - [2]: 1,2\n - [0]:' + expect(decode(toon)).toEqual([[1, 2], []]) + }) +}) + +describe('complex structures', () => { + it('parses mixed objects with arrays and nested objects', () => { + const toon + = 'user:\n' + + ' id: 123\n' + + ' name: Ada\n' + + ' tags[2]: reading,gaming\n' + + ' active: true\n' + + ' prefs[0]:' + expect(decode(toon)).toEqual({ + user: { + id: 123, + name: 'Ada', + tags: ['reading', 'gaming'], + active: true, + prefs: [], + }, + }) + }) +}) + +describe('mixed arrays', () => { + it('parses arrays mixing primitives, objects and strings (list format)', () => { + const toon + = 'items[3]:\n' + + ' - 1\n' + + ' - a: 1\n' + + ' - text' + expect(decode(toon)).toEqual({ items: [1, { a: 1 }, 'text'] }) + }) + + it('parses arrays mixing objects and arrays', () => { + const toon + = 'items[2]:\n' + + ' - a: 1\n' + + ' - [2]: 1,2' + expect(decode(toon)).toEqual({ items: [{ a: 1 }, [1, 2]] }) + }) +}) + +describe('delimiter options', () => { + describe('basic delimiter usage', () => { + it.each([ + { delimiter: '\t' as const, name: 'tab', header: '[3\t]', joined: 'reading\tgaming\tcoding' }, + { delimiter: '|' as const, name: 'pipe', header: '[3|]', joined: 'reading|gaming|coding' }, + { delimiter: ',' as const, name: 'comma', header: '[3]', joined: 'reading,gaming,coding' }, + ])('parses primitive arrays with $name delimiter', ({ header, joined }) => { + const toon = `tags${header}: ${joined}` + expect(decode(toon)).toEqual({ tags: ['reading', 'gaming', 'coding'] }) + }) + + it.each([ + { delimiter: '\t' as const, name: 'tab', header: '[2\t]{sku\tqty\tprice}', rows: ['A1\t2\t9.99', 'B2\t1\t14.5'] }, + { delimiter: '|' as const, name: 'pipe', header: '[2|]{sku|qty|price}', rows: ['A1|2|9.99', 'B2|1|14.5'] }, + ])('parses tabular arrays with $name delimiter', ({ header, rows }) => { + const toon = `items${header}:\n ${rows[0]}\n ${rows[1]}` + expect(decode(toon)).toEqual({ + items: [ + { sku: 'A1', qty: 2, price: 9.99 }, + { sku: 'B2', qty: 1, price: 14.5 }, + ], + }) + }) + + it.each([ + { header: '[2\t]', inner: '[2\t]', a: 'a\tb', b: 'c\td' }, + { header: '[2|]', inner: '[2|]', a: 'a|b', b: 'c|d' }, + ])('parses nested arrays with custom delimiters', ({ header, inner, a, b }) => { + const toon = `pairs${header}:\n - ${inner}: ${a}\n - ${inner}: ${b}` + expect(decode(toon)).toEqual({ pairs: [['a', 'b'], ['c', 'd']] }) + }) + + it.each([ + { header: '[3\t]', joined: 'x\ty\tz' }, + { header: '[3|]', joined: 'x|y|z' }, + ])('parses root arrays of primitives with custom delimiters', ({ header, joined }) => { + const toon = `${header}: ${joined}` + expect(decode(toon)).toEqual(['x', 'y', 'z']) + }) + + it.each([ + { header: '[2\t]{id}', rows: ['1', '2'] }, + { header: '[2|]{id}', rows: ['1', '2'] }, + ])('parses root arrays of objects with custom delimiters', ({ header, rows }) => { + const toon = `${header}:\n ${rows[0]}\n ${rows[1]}` + expect(decode(toon)).toEqual([{ id: 1 }, { id: 2 }]) + }) + }) + + describe('delimiter-aware quoting', () => { + it.each([ + { header: '[3\t]', joined: 'a\t"b\\tc"\td', expected: ['a', 'b\tc', 'd'] }, + { header: '[3|]', joined: 'a|"b|c"|d', expected: ['a', 'b|c', 'd'] }, + ])('parses values containing the active delimiter when quoted', ({ header, joined, expected }) => { + const toon = `items${header}: ${joined}` + expect(decode(toon)).toEqual({ items: expected }) + }) + + it.each([ + { header: '[2\t]', joined: 'a,b\tc,d' }, + { header: '[2|]', joined: 'a,b|c,d' }, + ])('does not split on commas when using non-comma delimiter', ({ header, joined }) => { + const toon = `items${header}: ${joined}` + expect(decode(toon)).toEqual({ items: ['a,b', 'c,d'] }) + }) + + it('parses tabular values containing the active delimiter correctly', () => { + const comma = 'items[2]{id,note}:\n 1,"a,b"\n 2,"c,d"' + expect(decode(comma)).toEqual({ items: [{ id: 1, note: 'a,b' }, { id: 2, note: 'c,d' }] }) + + const tab = 'items[2\t]{id\tnote}:\n 1\ta,b\n 2\tc,d' + expect(decode(tab)).toEqual({ items: [{ id: 1, note: 'a,b' }, { id: 2, note: 'c,d' }] }) + }) + + it('does not require quoting commas in object values when using non-comma delimiter elsewhere', () => { + expect(decode('note: a,b')).toEqual({ note: 'a,b' }) + }) + + it('parses nested array values containing the active delimiter', () => { + expect(decode('pairs[1|]:\n - [2|]: a|"b|c"')).toEqual({ pairs: [['a', 'b|c']] }) + expect(decode('pairs[1\t]:\n - [2\t]: a\t"b\\tc"')).toEqual({ pairs: [['a', 'b\tc']] }) + }) + }) + + describe('delimiter-independent quoting rules', () => { + it('preserves quoted ambiguity regardless of delimiter', () => { + expect(decode('items[3|]: "true"|"42"|"-3.14"')).toEqual({ items: ['true', '42', '-3.14'] }) + expect(decode('items[3\t]: "true"\t"42"\t"-3.14"')).toEqual({ items: ['true', '42', '-3.14'] }) + }) + + it('parses structural-looking strings when quoted', () => { + expect(decode('items[3|]: "[5]"|"{key}"|"- item"')).toEqual({ items: ['[5]', '{key}', '- item'] }) + expect(decode('items[3\t]: "[5]"\t"{key}"\t"- item"')).toEqual({ items: ['[5]', '{key}', '- item'] }) + }) + + it('parses tabular headers with keys containing the active delimiter', () => { + const toon = 'items[2|]{"a|b"}:\n 1\n 2' + expect(decode(toon)).toEqual({ items: [{ 'a|b': 1 }, { 'a|b': 2 }] }) + }) + }) +}) + +describe('length marker option', () => { + it('accepts length marker on primitive arrays', () => { + expect(decode('tags[#3]: reading,gaming,coding')).toEqual({ tags: ['reading', 'gaming', 'coding'] }) + }) + + it('accepts length marker on empty arrays', () => { + expect(decode('items[#0]:')).toEqual({ items: [] }) + }) + + it('accepts length marker on tabular arrays', () => { + const toon = 'items[#2]{sku,qty,price}:\n A1,2,9.99\n B2,1,14.5' + expect(decode(toon)).toEqual({ + items: [ + { sku: 'A1', qty: 2, price: 9.99 }, + { sku: 'B2', qty: 1, price: 14.5 }, + ], + }) + }) + + it('accepts length marker on nested arrays', () => { + const toon = 'pairs[#2]:\n - [#2]: a,b\n - [#2]: c,d' + expect(decode(toon)).toEqual({ pairs: [['a', 'b'], ['c', 'd']] }) + }) + + it('works with custom delimiters and length marker', () => { + expect(decode('tags[#3|]: reading|gaming|coding')).toEqual({ tags: ['reading', 'gaming', 'coding'] }) + }) +}) + +describe('error handling', () => { + it('throws on array length mismatch (inline primitives)', () => { + const toon = 'tags[2]: a,b,c' + expect(() => decode(toon)).toThrow() + }) + + it('throws on array length mismatch (list format)', () => { + const toon = 'items[1]:\n - 1\n - 2' + expect(() => decode(toon)).toThrow() + }) + + it('throws when tabular row value count does not match header field count', () => { + const toon = 'items[2]{id,name}:\n 1,Ada\n 2' + expect(() => decode(toon)).toThrow() + }) + + it('throws when tabular row count does not match header length', () => { + const toon = '[1]{id}:\n 1\n 2' + expect(() => decode(toon)).toThrow() + }) + + it('throws on invalid escape sequences', () => { + expect(() => decode('"a\\x"')).toThrow() + expect(() => decode('"unterminated')).toThrow() + }) + + it('throws on missing colon in key-value context', () => { + expect(() => decode('a:\n user')).toThrow() + }) + + it('throws on delimiter mismatch', () => { + const toon = 'items[2\t]{a\tb}:\n 1,2\n 3,4' + expect(() => decode(toon)).toThrow() + }) +}) diff --git a/test/index.test.ts b/test/encode.test.ts similarity index 100% rename from test/index.test.ts rename to test/encode.test.ts