mirror of
https://github.com/voson-wang/toon.git
synced 2026-01-29 15:24:10 +08:00
feat: decode method (#10)
This commit is contained in:
84
README.md
84
README.md
@@ -50,21 +50,25 @@ users[2]{id,name,role}:
|
||||
|
||||
```
|
||||
⭐ GitHub Repositories ██████████████░░░░░░░░░░░ 8,745 tokens
|
||||
vs JSON: 15,145 💰 42.3% saved
|
||||
vs XML: 17,095 💰 48.8% saved
|
||||
vs JSON: 15,145 (-42.3%)
|
||||
vs YAML: 13,129 (-33.4%)
|
||||
vs XML: 17,095 (-48.8%)
|
||||
|
||||
📈 Daily Analytics ██████████░░░░░░░░░░░░░░░ 4,507 tokens
|
||||
vs JSON: 10,977 💰 58.9% saved
|
||||
vs XML: 13,128 💰 65.7% saved
|
||||
vs JSON: 10,977 (-58.9%)
|
||||
vs YAML: 8,810 (-48.8%)
|
||||
vs XML: 13,128 (-65.7%)
|
||||
|
||||
🛒 E-Commerce Order ████████████████░░░░░░░░░ 166 tokens
|
||||
vs JSON: 257 💰 35.4% saved
|
||||
vs XML: 271 💰 38.7% saved
|
||||
vs JSON: 257 (-35.4%)
|
||||
vs YAML: 197 (-15.7%)
|
||||
vs XML: 271 (-38.7%)
|
||||
|
||||
─────────────────────────────────────────────────────────────────────
|
||||
Total ████████████░░░░░░░░░░░░░ 13,418 tokens
|
||||
vs JSON: 26,379 💰 49.1% saved
|
||||
vs XML: 30,494 💰 56.0% saved
|
||||
Total █████████████░░░░░░░░░░░░ 13,418 tokens
|
||||
vs JSON: 26,379 (-49.1%)
|
||||
vs YAML: 22,136 (-39.4%)
|
||||
vs XML: 30,494 (-56.0%)
|
||||
```
|
||||
|
||||
<details>
|
||||
@@ -371,7 +375,7 @@ Four datasets designed to test different structural patterns:
|
||||
|
||||
#### Evaluation Process
|
||||
|
||||
1. **Format conversion:** Each dataset is converted to all 5 formats (TOON, CSV, XML, JSON, YAML).
|
||||
1. **Format conversion**: Each dataset is converted to all 5 formats (TOON, CSV, XML, JSON, YAML).
|
||||
2. **Query LLM**: Each model receives formatted data + question in a prompt and extracts the answer.
|
||||
3. **Validate with LLM-as-judge**: `gpt-5-nano` validates if the answer is semantically correct (e.g., `50000` = `$50,000`, `Engineering` = `engineering`, `2025-01-01` = `January 1, 2025`).
|
||||
|
||||
@@ -764,6 +768,48 @@ encode(data, { lengthMarker: '#', delimiter: '|' })
|
||||
// B2|1|14.5
|
||||
```
|
||||
|
||||
### `decode(input: string, options?: DecodeOptions): JsonValue`
|
||||
|
||||
Converts a TOON-formatted string back to JavaScript values.
|
||||
|
||||
**Parameters:**
|
||||
|
||||
- `input` – A TOON-formatted string to parse
|
||||
- `options` – Optional decoding options:
|
||||
- `indent?: number` – Expected number of spaces per indentation level (default: `2`)
|
||||
- `strict?: boolean` – Enable strict validation (default: `true`)
|
||||
|
||||
**Returns:**
|
||||
|
||||
A JavaScript value (object, array, or primitive) representing the parsed TOON data.
|
||||
|
||||
**Example:**
|
||||
|
||||
```ts
|
||||
import { decode } from '@byjohann/toon'
|
||||
|
||||
const toon = `items[2]{sku,qty,price}:
|
||||
A1,2,9.99
|
||||
B2,1,14.5`
|
||||
|
||||
const data = decode(toon)
|
||||
// {
|
||||
// items: [
|
||||
// { sku: 'A1', qty: 2, price: 9.99 },
|
||||
// { sku: 'B2', qty: 1, price: 14.5 }
|
||||
// ]
|
||||
// }
|
||||
```
|
||||
|
||||
**Strict Mode:**
|
||||
|
||||
By default, the decoder validates input strictly:
|
||||
|
||||
- **Invalid escape sequences** – Throws on `"\x"`, unterminated strings
|
||||
- **Syntax errors** – Throws on missing colons, malformed headers
|
||||
- **Array length mismatches** – Throws when declared length doesn't match actual count
|
||||
- **Delimiter mismatches** – Throws when row delimiters don't match header
|
||||
|
||||
## Notes and Limitations
|
||||
|
||||
- Format familiarity matters as much as token count. TOON's tabular format requires arrays of objects with identical keys and primitive values only – when this doesn't hold (due to mixed types, non-uniform objects, or nested structures), TOON switches to list format where JSON can be cheaper at scale.
|
||||
@@ -785,7 +831,7 @@ Wrap your encoded data in a fenced code block (label it \`\`\`toon for clarity).
|
||||
For output, be more explicit. When you want the model to **generate** TOON:
|
||||
|
||||
- **Show the expected header** (`users[N]{id,name,role}:`). The model fills rows instead of repeating keys, reducing generation errors.
|
||||
- **State the rules**: 2-space indent, no trailing spaces, `[N]` matches row count.
|
||||
- **State the rules:** 2-space indent, no trailing spaces, `[N]` matches row count.
|
||||
|
||||
Here's a prompt that works for both reading and generating:
|
||||
|
||||
@@ -850,16 +896,16 @@ Task: Return only users with role "user" as TOON. Use the same header. Set [N] t
|
||||
|
||||
## Ports in Other Languages
|
||||
|
||||
- **Elixir**: [toon_ex](https://github.com/kentaro/toon_ex)
|
||||
- **PHP**: [toon-php](https://github.com/HelgeSverre/toon-php)
|
||||
- **Python**: [pytoon](https://github.com/bpradana/pytoon)
|
||||
- **Elixir:** [toon_ex](https://github.com/kentaro/toon_ex)
|
||||
- **PHP:** [toon-php](https://github.com/HelgeSverre/toon-php)
|
||||
- **Python:** [pytoon](https://github.com/bpradana/pytoon)
|
||||
- [python-toon](https://github.com/xaviviro/python-toon)
|
||||
- [toon-python](https://gitlab.com/KanTakahiro/toon-python)
|
||||
- **Ruby**: [toon-ruby](https://github.com/andrepcg/toon-ruby)
|
||||
- **Java**: [JToon](https://github.com/felipestanzani/JToon)
|
||||
- **.NET**: [toon.NET](https://github.com/ghost1face/toon.NET)
|
||||
- **Swift**: [TOONEncoder](https://github.com/mattt/TOONEncoder)
|
||||
- **Go** [gotoon](https://github.com/alpkeskin/gotoon)
|
||||
- **Ruby:** [toon-ruby](https://github.com/andrepcg/toon-ruby)
|
||||
- **Java:** [JToon](https://github.com/felipestanzani/JToon)
|
||||
- **.NET:** [toon.NET](https://github.com/ghost1face/toon.NET)
|
||||
- **Swift:** [TOONEncoder](https://github.com/mattt/TOONEncoder)
|
||||
- **Go:** [gotoon](https://github.com/alpkeskin/gotoon)
|
||||
|
||||
## License
|
||||
|
||||
|
||||
@@ -159,7 +159,7 @@ Four datasets designed to test different structural patterns:
|
||||
|
||||
#### Evaluation Process
|
||||
|
||||
1. **Format conversion:** Each dataset is converted to all 5 formats (TOON, CSV, XML, JSON, YAML).
|
||||
1. **Format conversion**: Each dataset is converted to all 5 formats (TOON, CSV, XML, JSON, YAML).
|
||||
2. **Query LLM**: Each model receives formatted data + question in a prompt and extracts the answer.
|
||||
3. **Validate with LLM-as-judge**: `gpt-5-nano` validates if the answer is semantically correct (e.g., `50000` = `$50,000`, `Engineering` = `engineering`, `2025-01-01` = `January 1, 2025`).
|
||||
|
||||
|
||||
@@ -248,7 +248,7 @@ ${totalQuestions} questions are generated dynamically across three categories:
|
||||
|
||||
#### Evaluation Process
|
||||
|
||||
1. **Format conversion:** Each dataset is converted to all ${formatCount} formats (${formatResults.map(f => f.format.toUpperCase()).join(', ')}).
|
||||
1. **Format conversion**: Each dataset is converted to all ${formatCount} formats (${formatResults.map(f => f.format.toUpperCase()).join(', ')}).
|
||||
2. **Query LLM**: Each model receives formatted data + question in a prompt and extracts the answer.
|
||||
3. **Validate with LLM-as-judge**: \`gpt-5-nano\` validates if the answer is semantically correct (e.g., \`50000\` = \`$50,000\`, \`Engineering\` = \`engineering\`, \`2025-01-01\` = \`January 1, 2025\`).
|
||||
|
||||
|
||||
@@ -11,6 +11,7 @@ export const COMMA = ','
|
||||
export const COLON = ':'
|
||||
export const SPACE = ' '
|
||||
export const PIPE = '|'
|
||||
export const HASH = '#'
|
||||
|
||||
// #endregion
|
||||
|
||||
|
||||
419
src/decoders.ts
Normal file
419
src/decoders.ts
Normal file
@@ -0,0 +1,419 @@
|
||||
import type { LineCursor } from './scanner'
|
||||
import type {
|
||||
ArrayHeaderInfo,
|
||||
Depth,
|
||||
JsonArray,
|
||||
JsonObject,
|
||||
JsonPrimitive,
|
||||
JsonValue,
|
||||
ParsedLine,
|
||||
ResolvedDecodeOptions,
|
||||
} from './types'
|
||||
import {
|
||||
COLON,
|
||||
DEFAULT_DELIMITER,
|
||||
LIST_ITEM_PREFIX,
|
||||
} from './constants'
|
||||
import {
|
||||
isArrayHeaderAfterHyphen,
|
||||
isObjectFirstFieldAfterHyphen,
|
||||
parseArrayHeaderLine,
|
||||
parseKeyToken,
|
||||
parsePrimitiveToken,
|
||||
parseRowValuesToPrimitives,
|
||||
splitDelimitedValues,
|
||||
} from './parser'
|
||||
|
||||
// #region Entry decoding
|
||||
|
||||
export function decodeValueFromLines(cursor: LineCursor, options: ResolvedDecodeOptions): JsonValue {
|
||||
const first = cursor.peek()
|
||||
if (!first) {
|
||||
throw new Error('No content to decode')
|
||||
}
|
||||
|
||||
// Check for root array
|
||||
if (isRootArrayHeaderLine(first)) {
|
||||
const headerInfo = parseArrayHeaderLine(first.content, DEFAULT_DELIMITER)
|
||||
if (headerInfo) {
|
||||
cursor.advance() // Move past the header line
|
||||
return decodeArrayFromHeader(headerInfo.header, first, cursor, 0, options)
|
||||
}
|
||||
}
|
||||
|
||||
// Check for single primitive value
|
||||
if (cursor.length === 1 && !isKeyValueLine(first)) {
|
||||
return parsePrimitiveToken(first.content.trim())
|
||||
}
|
||||
|
||||
// Default to object
|
||||
return decodeObject(cursor, 0, options)
|
||||
}
|
||||
|
||||
function isRootArrayHeaderLine(line: ParsedLine): boolean {
|
||||
const content = line.content.trim()
|
||||
// Root array: starts with [ and has a colon
|
||||
return content.startsWith('[') && content.includes(COLON)
|
||||
}
|
||||
|
||||
function isKeyValueLine(line: ParsedLine): boolean {
|
||||
const content = line.content
|
||||
// Look for unquoted colon or quoted key followed by colon
|
||||
if (content.startsWith('"')) {
|
||||
// Quoted key
|
||||
let i = 1
|
||||
while (i < content.length) {
|
||||
if (content[i] === '\\' && i + 1 < content.length) {
|
||||
i += 2
|
||||
continue
|
||||
}
|
||||
if (content[i] === '"') {
|
||||
// Found end of quoted key, check for colon
|
||||
return content[i + 1] === COLON
|
||||
}
|
||||
i++
|
||||
}
|
||||
return false
|
||||
}
|
||||
else {
|
||||
// Unquoted key - look for first colon not inside quotes
|
||||
return content.includes(COLON)
|
||||
}
|
||||
}
|
||||
|
||||
// #endregion
|
||||
|
||||
// #region Object decoding
|
||||
|
||||
function decodeObject(cursor: LineCursor, baseDepth: Depth, options: ResolvedDecodeOptions): JsonObject {
|
||||
const obj: JsonObject = {}
|
||||
|
||||
while (!cursor.atEnd()) {
|
||||
const line = cursor.peek()
|
||||
if (!line || line.depth < baseDepth) {
|
||||
break
|
||||
}
|
||||
|
||||
if (line.depth === baseDepth) {
|
||||
const [key, value] = decodeKeyValuePair(line, cursor, baseDepth, options)
|
||||
obj[key] = value
|
||||
}
|
||||
else {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return obj
|
||||
}
|
||||
|
||||
function decodeKeyValuePair(
|
||||
line: ParsedLine,
|
||||
cursor: LineCursor,
|
||||
baseDepth: Depth,
|
||||
options: ResolvedDecodeOptions,
|
||||
): [key: string, value: JsonValue] {
|
||||
cursor.advance()
|
||||
|
||||
// Check for array header first (before parsing key)
|
||||
const arrayHeader = parseArrayHeaderLine(line.content, DEFAULT_DELIMITER)
|
||||
if (arrayHeader && arrayHeader.header.key) {
|
||||
const value = decodeArrayFromHeader(arrayHeader.header, line, cursor, baseDepth, options)
|
||||
return [arrayHeader.header.key, value]
|
||||
}
|
||||
|
||||
// Regular key-value pair
|
||||
const { key, end } = parseKeyToken(line.content, 0)
|
||||
const rest = line.content.slice(end).trim()
|
||||
|
||||
// No value after colon - expect nested object or empty
|
||||
if (!rest) {
|
||||
const nextLine = cursor.peek()
|
||||
if (nextLine && nextLine.depth > baseDepth) {
|
||||
const nested = expectNestedObject(cursor, baseDepth + 1, options)
|
||||
return [key, nested]
|
||||
}
|
||||
// Empty object
|
||||
return [key, {}]
|
||||
}
|
||||
|
||||
// Inline primitive value
|
||||
const value = parsePrimitiveToken(rest)
|
||||
return [key, value]
|
||||
}
|
||||
|
||||
function expectNestedObject(cursor: LineCursor, nestedDepth: Depth, options: ResolvedDecodeOptions): JsonObject {
|
||||
return decodeObject(cursor, nestedDepth, options)
|
||||
}
|
||||
|
||||
// #endregion
|
||||
|
||||
// #region Array decoding
|
||||
|
||||
function decodeArrayFromHeader(
|
||||
header: ArrayHeaderInfo,
|
||||
line: ParsedLine,
|
||||
cursor: LineCursor,
|
||||
baseDepth: Depth,
|
||||
options: ResolvedDecodeOptions,
|
||||
): JsonArray {
|
||||
const arrayHeader = parseArrayHeaderLine(line.content, DEFAULT_DELIMITER)
|
||||
if (!arrayHeader) {
|
||||
throw new Error('Invalid array header')
|
||||
}
|
||||
|
||||
// Inline primitive array
|
||||
if (arrayHeader.inlineValues) {
|
||||
// For inline arrays, cursor should already be advanced or will be by caller
|
||||
return decodeInlinePrimitiveArray(header, arrayHeader.inlineValues, options)
|
||||
}
|
||||
|
||||
// For multi-line arrays (tabular or list), the cursor should already be positioned
|
||||
// at the array header line, but we haven't advanced past it yet
|
||||
|
||||
// Tabular array
|
||||
if (header.fields && header.fields.length > 0) {
|
||||
return decodeTabularArray(header, cursor, baseDepth, options)
|
||||
}
|
||||
|
||||
// List array
|
||||
return decodeListArray(header, cursor, baseDepth, options)
|
||||
}
|
||||
|
||||
function decodeInlinePrimitiveArray(
|
||||
header: ArrayHeaderInfo,
|
||||
inlineValues: string,
|
||||
options: ResolvedDecodeOptions,
|
||||
): JsonPrimitive[] {
|
||||
if (!inlineValues.trim()) {
|
||||
assertExpectedCount(0, header.length, 'inline array items', options)
|
||||
return []
|
||||
}
|
||||
|
||||
const values = splitDelimitedValues(inlineValues, header.delimiter)
|
||||
const primitives = parseRowValuesToPrimitives(values)
|
||||
|
||||
assertExpectedCount(primitives.length, header.length, 'inline array items', options)
|
||||
|
||||
return primitives
|
||||
}
|
||||
|
||||
function decodeListArray(
|
||||
header: ArrayHeaderInfo,
|
||||
cursor: LineCursor,
|
||||
baseDepth: Depth,
|
||||
options: ResolvedDecodeOptions,
|
||||
): JsonValue[] {
|
||||
const items: JsonValue[] = []
|
||||
const itemDepth = baseDepth + 1
|
||||
|
||||
while (!cursor.atEnd() && items.length < header.length) {
|
||||
const line = cursor.peek()
|
||||
if (!line || line.depth < itemDepth) {
|
||||
break
|
||||
}
|
||||
|
||||
if (line.depth === itemDepth && line.content.startsWith(LIST_ITEM_PREFIX)) {
|
||||
const item = decodeListItem(cursor, itemDepth, header.delimiter, options)
|
||||
items.push(item)
|
||||
}
|
||||
else {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
assertExpectedCount(items.length, header.length, 'list array items', options)
|
||||
|
||||
// In strict mode, check for extra items
|
||||
if (options.strict && !cursor.atEnd()) {
|
||||
const nextLine = cursor.peek()
|
||||
if (nextLine && nextLine.depth === itemDepth && nextLine.content.startsWith(LIST_ITEM_PREFIX)) {
|
||||
throw new Error(`Expected ${header.length} list array items, but found more`)
|
||||
}
|
||||
}
|
||||
|
||||
return items
|
||||
}
|
||||
|
||||
function decodeTabularArray(
|
||||
header: ArrayHeaderInfo,
|
||||
cursor: LineCursor,
|
||||
baseDepth: Depth,
|
||||
options: ResolvedDecodeOptions,
|
||||
): JsonObject[] {
|
||||
const objects: JsonObject[] = []
|
||||
const rowDepth = baseDepth + 1
|
||||
|
||||
while (!cursor.atEnd() && objects.length < header.length) {
|
||||
const line = cursor.peek()
|
||||
if (!line || line.depth < rowDepth) {
|
||||
break
|
||||
}
|
||||
|
||||
if (line.depth === rowDepth) {
|
||||
cursor.advance()
|
||||
const values = splitDelimitedValues(line.content, header.delimiter)
|
||||
assertExpectedCount(values.length, header.fields!.length, 'tabular row values', options)
|
||||
|
||||
const primitives = parseRowValuesToPrimitives(values)
|
||||
const obj: JsonObject = {}
|
||||
|
||||
for (let i = 0; i < header.fields!.length; i++) {
|
||||
obj[header.fields![i]!] = primitives[i]!
|
||||
}
|
||||
|
||||
objects.push(obj)
|
||||
}
|
||||
else {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
assertExpectedCount(objects.length, header.length, 'tabular rows', options)
|
||||
|
||||
// In strict mode, check for extra rows
|
||||
if (options.strict && !cursor.atEnd()) {
|
||||
const nextLine = cursor.peek()
|
||||
if (nextLine && nextLine.depth === rowDepth && !nextLine.content.startsWith(LIST_ITEM_PREFIX)) {
|
||||
// A key-value pair has a colon (and if it has delimiter, colon comes first)
|
||||
// A data row either has no colon, or has delimiter before colon
|
||||
const hasColon = nextLine.content.includes(COLON)
|
||||
const hasDelimiter = nextLine.content.includes(header.delimiter)
|
||||
|
||||
if (!hasColon) {
|
||||
// No colon = data row (for single-field tables)
|
||||
throw new Error(`Expected ${header.length} tabular rows, but found more`)
|
||||
}
|
||||
else if (hasDelimiter) {
|
||||
// Has both colon and delimiter - check which comes first
|
||||
const colonPos = nextLine.content.indexOf(COLON)
|
||||
const delimiterPos = nextLine.content.indexOf(header.delimiter)
|
||||
if (delimiterPos < colonPos) {
|
||||
// Delimiter before colon = data row
|
||||
throw new Error(`Expected ${header.length} tabular rows, but found more`)
|
||||
}
|
||||
// Colon before delimiter = key-value pair, OK
|
||||
}
|
||||
// Has colon but no delimiter = key-value pair, OK
|
||||
}
|
||||
}
|
||||
|
||||
return objects
|
||||
}
|
||||
|
||||
// #endregion
|
||||
|
||||
// #region List item decoding
|
||||
|
||||
function decodeListItem(
|
||||
cursor: LineCursor,
|
||||
baseDepth: Depth,
|
||||
activeDelimiter: string,
|
||||
options: ResolvedDecodeOptions,
|
||||
): JsonValue {
|
||||
const line = cursor.next()
|
||||
if (!line) {
|
||||
throw new Error('Expected list item')
|
||||
}
|
||||
|
||||
const afterHyphen = line.content.slice(LIST_ITEM_PREFIX.length)
|
||||
|
||||
// Check for array header after hyphen
|
||||
if (isArrayHeaderAfterHyphen(afterHyphen)) {
|
||||
const arrayHeader = parseArrayHeaderLine(afterHyphen, activeDelimiter as any)
|
||||
if (arrayHeader) {
|
||||
return decodeArrayFromHeader(arrayHeader.header, line, cursor, baseDepth, options)
|
||||
}
|
||||
}
|
||||
|
||||
// Check for object first field after hyphen
|
||||
if (isObjectFirstFieldAfterHyphen(afterHyphen)) {
|
||||
return decodeObjectFromListItem(line, cursor, baseDepth, options)
|
||||
}
|
||||
|
||||
// Primitive value
|
||||
return parsePrimitiveToken(afterHyphen)
|
||||
}
|
||||
|
||||
function decodeObjectFromListItem(
|
||||
firstLine: ParsedLine,
|
||||
cursor: LineCursor,
|
||||
baseDepth: Depth,
|
||||
options: ResolvedDecodeOptions,
|
||||
): JsonObject {
|
||||
const afterHyphen = firstLine.content.slice(LIST_ITEM_PREFIX.length)
|
||||
const { key, value, followDepth } = decodeFirstFieldOnHyphen(afterHyphen, cursor, baseDepth, options)
|
||||
|
||||
const obj: JsonObject = { [key]: value }
|
||||
|
||||
// Read subsequent fields
|
||||
while (!cursor.atEnd()) {
|
||||
const line = cursor.peek()
|
||||
if (!line || line.depth < followDepth) {
|
||||
break
|
||||
}
|
||||
|
||||
if (line.depth === followDepth && !line.content.startsWith(LIST_ITEM_PREFIX)) {
|
||||
const [k, v] = decodeKeyValuePair(line, cursor, followDepth, options)
|
||||
obj[k] = v
|
||||
}
|
||||
else {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return obj
|
||||
}
|
||||
|
||||
function decodeFirstFieldOnHyphen(
|
||||
rest: string,
|
||||
cursor: LineCursor,
|
||||
baseDepth: Depth,
|
||||
options: ResolvedDecodeOptions,
|
||||
): { key: string, value: JsonValue, followDepth: Depth } {
|
||||
// Check for array header as first field
|
||||
const arrayHeader = parseArrayHeaderLine(rest, DEFAULT_DELIMITER)
|
||||
if (arrayHeader) {
|
||||
// Create a synthetic line for array decoding
|
||||
const syntheticLine: ParsedLine = {
|
||||
raw: rest,
|
||||
content: rest,
|
||||
indent: baseDepth * options.indent,
|
||||
depth: baseDepth,
|
||||
}
|
||||
|
||||
const value = decodeArrayFromHeader(arrayHeader.header, syntheticLine, cursor, baseDepth, options)
|
||||
|
||||
// After an array, subsequent fields are at baseDepth + 1 (where array content is)
|
||||
return {
|
||||
key: arrayHeader.header.key!,
|
||||
value,
|
||||
followDepth: baseDepth + 1,
|
||||
}
|
||||
}
|
||||
|
||||
// Regular key-value pair
|
||||
const { key, end } = parseKeyToken(rest, 0)
|
||||
const afterKey = rest.slice(end).trim()
|
||||
|
||||
if (!afterKey) {
|
||||
// Nested object
|
||||
const nested = expectNestedObject(cursor, baseDepth + 1, options)
|
||||
return { key, value: nested, followDepth: baseDepth + 1 }
|
||||
}
|
||||
|
||||
// Inline primitive
|
||||
const value = parsePrimitiveToken(afterKey)
|
||||
return { key, value, followDepth: baseDepth + 1 }
|
||||
}
|
||||
|
||||
// #endregion
|
||||
|
||||
// #region Validation
|
||||
|
||||
function assertExpectedCount(actual: number, expected: number, what: string, options: ResolvedDecodeOptions): void {
|
||||
if (options.strict && actual !== expected) {
|
||||
throw new Error(`Expected ${expected} ${what}, but got ${actual}`)
|
||||
}
|
||||
}
|
||||
|
||||
// #endregion
|
||||
26
src/index.ts
26
src/index.ts
@@ -1,13 +1,19 @@
|
||||
import type {
|
||||
DecodeOptions,
|
||||
EncodeOptions,
|
||||
JsonValue,
|
||||
ResolvedDecodeOptions,
|
||||
ResolvedEncodeOptions,
|
||||
} from './types'
|
||||
import { DEFAULT_DELIMITER } from './constants'
|
||||
import { decodeValueFromLines } from './decoders'
|
||||
import { encodeValue } from './encoders'
|
||||
import { normalizeValue } from './normalize'
|
||||
import { LineCursor, toParsedLines } from './scanner'
|
||||
|
||||
export { DEFAULT_DELIMITER, DELIMITERS } from './constants'
|
||||
export type {
|
||||
DecodeOptions,
|
||||
Delimiter,
|
||||
DelimiterKey,
|
||||
EncodeOptions,
|
||||
@@ -15,6 +21,7 @@ export type {
|
||||
JsonObject,
|
||||
JsonPrimitive,
|
||||
JsonValue,
|
||||
ResolvedDecodeOptions,
|
||||
ResolvedEncodeOptions,
|
||||
} from './types'
|
||||
|
||||
@@ -24,6 +31,18 @@ export function encode(input: unknown, options?: EncodeOptions): string {
|
||||
return encodeValue(normalizedValue, resolvedOptions)
|
||||
}
|
||||
|
||||
export function decode(input: string, options?: DecodeOptions): JsonValue {
|
||||
const resolved = resolveDecodeOptions(options)
|
||||
const lines = toParsedLines(input, resolved.indent)
|
||||
|
||||
if (lines.length === 0) {
|
||||
throw new Error('Cannot decode empty input')
|
||||
}
|
||||
|
||||
const cursor = new LineCursor(lines)
|
||||
return decodeValueFromLines(cursor, resolved)
|
||||
}
|
||||
|
||||
function resolveOptions(options?: EncodeOptions): ResolvedEncodeOptions {
|
||||
return {
|
||||
indent: options?.indent ?? 2,
|
||||
@@ -31,3 +50,10 @@ function resolveOptions(options?: EncodeOptions): ResolvedEncodeOptions {
|
||||
lengthMarker: options?.lengthMarker ?? false,
|
||||
}
|
||||
}
|
||||
|
||||
function resolveDecodeOptions(options?: DecodeOptions): ResolvedDecodeOptions {
|
||||
return {
|
||||
indent: options?.indent ?? 2,
|
||||
strict: options?.strict ?? true,
|
||||
}
|
||||
}
|
||||
|
||||
393
src/parser.ts
Normal file
393
src/parser.ts
Normal file
@@ -0,0 +1,393 @@
|
||||
import type {
|
||||
ArrayHeaderInfo,
|
||||
Delimiter,
|
||||
JsonPrimitive,
|
||||
} from './types'
|
||||
import {
|
||||
BACKSLASH,
|
||||
CARRIAGE_RETURN,
|
||||
CLOSE_BRACE,
|
||||
CLOSE_BRACKET,
|
||||
COLON,
|
||||
DELIMITERS,
|
||||
DOUBLE_QUOTE,
|
||||
FALSE_LITERAL,
|
||||
HASH,
|
||||
NEWLINE,
|
||||
NULL_LITERAL,
|
||||
OPEN_BRACE,
|
||||
OPEN_BRACKET,
|
||||
PIPE,
|
||||
TAB,
|
||||
TRUE_LITERAL,
|
||||
} from './constants'
|
||||
|
||||
// #region Array header parsing
|
||||
|
||||
export function parseArrayHeaderLine(
|
||||
content: string,
|
||||
defaultDelimiter: Delimiter,
|
||||
): { header: ArrayHeaderInfo, inlineValues?: string } | undefined {
|
||||
// Don't match if the line starts with a quote (it's a quoted key, not an array)
|
||||
if (content.trimStart().startsWith(DOUBLE_QUOTE)) {
|
||||
return undefined
|
||||
}
|
||||
|
||||
// Find the bracket segment first
|
||||
const bracketStart = content.indexOf(OPEN_BRACKET)
|
||||
if (bracketStart === -1) {
|
||||
return undefined
|
||||
}
|
||||
|
||||
const bracketEnd = content.indexOf(CLOSE_BRACKET, bracketStart)
|
||||
if (bracketEnd === -1) {
|
||||
return undefined
|
||||
}
|
||||
|
||||
// Find the colon that comes after all brackets and braces
|
||||
let colonIndex = bracketEnd + 1
|
||||
let braceEnd = colonIndex
|
||||
|
||||
// Check for fields segment (braces come after bracket)
|
||||
const braceStart = content.indexOf(OPEN_BRACE, bracketEnd)
|
||||
if (braceStart !== -1 && braceStart < content.indexOf(COLON, bracketEnd)) {
|
||||
const foundBraceEnd = content.indexOf(CLOSE_BRACE, braceStart)
|
||||
if (foundBraceEnd !== -1) {
|
||||
braceEnd = foundBraceEnd + 1
|
||||
}
|
||||
}
|
||||
|
||||
// Now find colon after brackets and braces
|
||||
colonIndex = content.indexOf(COLON, Math.max(bracketEnd, braceEnd))
|
||||
if (colonIndex === -1) {
|
||||
return undefined
|
||||
}
|
||||
|
||||
const key = bracketStart > 0 ? content.slice(0, bracketStart) : undefined
|
||||
const afterColon = content.slice(colonIndex + 1).trim()
|
||||
|
||||
const bracketContent = content.slice(bracketStart + 1, bracketEnd)
|
||||
|
||||
// Try to parse bracket segment; return undefined if it fails
|
||||
let parsedBracket
|
||||
try {
|
||||
parsedBracket = parseBracketSegment(bracketContent, defaultDelimiter)
|
||||
}
|
||||
catch {
|
||||
return undefined
|
||||
}
|
||||
|
||||
const { length, delimiter, hasLengthMarker } = parsedBracket
|
||||
|
||||
// Check for fields segment
|
||||
let fields: string[] | undefined
|
||||
if (braceStart !== -1 && braceStart < colonIndex) {
|
||||
const foundBraceEnd = content.indexOf(CLOSE_BRACE, braceStart)
|
||||
if (foundBraceEnd !== -1 && foundBraceEnd < colonIndex) {
|
||||
const fieldsContent = content.slice(braceStart + 1, foundBraceEnd)
|
||||
fields = parseFieldsSegment(fieldsContent, delimiter)
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
header: {
|
||||
key,
|
||||
length,
|
||||
delimiter,
|
||||
fields,
|
||||
hasLengthMarker,
|
||||
},
|
||||
inlineValues: afterColon || undefined,
|
||||
}
|
||||
}
|
||||
|
||||
export function parseBracketSegment(
|
||||
seg: string,
|
||||
defaultDelimiter: Delimiter,
|
||||
): { length: number, delimiter: Delimiter, hasLengthMarker: boolean } {
|
||||
let hasLengthMarker = false
|
||||
let content = seg
|
||||
|
||||
// Check for length marker
|
||||
if (content.startsWith(HASH)) {
|
||||
hasLengthMarker = true
|
||||
content = content.slice(1)
|
||||
}
|
||||
|
||||
// Check for delimiter suffix
|
||||
let delimiter = defaultDelimiter
|
||||
if (content.endsWith(TAB)) {
|
||||
delimiter = DELIMITERS.tab
|
||||
content = content.slice(0, -1)
|
||||
}
|
||||
else if (content.endsWith(PIPE)) {
|
||||
delimiter = DELIMITERS.pipe
|
||||
content = content.slice(0, -1)
|
||||
}
|
||||
|
||||
const length = Number.parseInt(content, 10)
|
||||
if (Number.isNaN(length)) {
|
||||
throw new TypeError(`Invalid array length: ${seg}`)
|
||||
}
|
||||
|
||||
return { length, delimiter, hasLengthMarker }
|
||||
}
|
||||
|
||||
export function parseFieldsSegment(seg: string, delimiter: Delimiter): string[] {
|
||||
return splitDelimitedValues(seg, delimiter).map(field => parseStringLiteral(field.trim()))
|
||||
}
|
||||
|
||||
// #endregion
|
||||
|
||||
// #region Delimited value parsing
|
||||
|
||||
export function splitDelimitedValues(input: string, delimiter: Delimiter): string[] {
|
||||
const values: string[] = []
|
||||
let current = ''
|
||||
let inQuotes = false
|
||||
let i = 0
|
||||
|
||||
while (i < input.length) {
|
||||
const char = input[i]
|
||||
|
||||
if (char === BACKSLASH && i + 1 < input.length && inQuotes) {
|
||||
// Escape sequence in quoted string
|
||||
current += char + input[i + 1]
|
||||
i += 2
|
||||
continue
|
||||
}
|
||||
|
||||
if (char === DOUBLE_QUOTE) {
|
||||
inQuotes = !inQuotes
|
||||
current += char
|
||||
i++
|
||||
continue
|
||||
}
|
||||
|
||||
if (char === delimiter && !inQuotes) {
|
||||
values.push(current.trim())
|
||||
current = ''
|
||||
i++
|
||||
continue
|
||||
}
|
||||
|
||||
current += char
|
||||
i++
|
||||
}
|
||||
|
||||
// Add last value
|
||||
if (current || values.length > 0) {
|
||||
values.push(current.trim())
|
||||
}
|
||||
|
||||
return values
|
||||
}
|
||||
|
||||
export function parseRowValuesToPrimitives(values: string[]): JsonPrimitive[] {
|
||||
return values.map(v => parsePrimitiveToken(v))
|
||||
}
|
||||
|
||||
// #endregion
|
||||
|
||||
// #region Primitive and key parsing
|
||||
|
||||
export function parsePrimitiveToken(token: string): JsonPrimitive {
|
||||
const trimmed = token.trim()
|
||||
|
||||
// Empty token
|
||||
if (!trimmed) {
|
||||
return ''
|
||||
}
|
||||
|
||||
// Quoted string (if starts with quote, it MUST be properly quoted)
|
||||
if (trimmed.startsWith(DOUBLE_QUOTE)) {
|
||||
return parseStringLiteral(trimmed)
|
||||
}
|
||||
|
||||
// Boolean or null literals
|
||||
if (isBooleanOrNullLiteral(trimmed)) {
|
||||
if (trimmed === TRUE_LITERAL)
|
||||
return true
|
||||
if (trimmed === FALSE_LITERAL)
|
||||
return false
|
||||
if (trimmed === NULL_LITERAL)
|
||||
return null
|
||||
}
|
||||
|
||||
// Numeric literal
|
||||
if (isNumericLiteral(trimmed)) {
|
||||
return Number.parseFloat(trimmed)
|
||||
}
|
||||
|
||||
// Unquoted string
|
||||
return trimmed
|
||||
}
|
||||
|
||||
export function isBooleanOrNullLiteral(token: string): boolean {
|
||||
return token === TRUE_LITERAL || token === FALSE_LITERAL || token === NULL_LITERAL
|
||||
}
|
||||
|
||||
export function isNumericLiteral(token: string): boolean {
|
||||
if (!token)
|
||||
return false
|
||||
|
||||
// Must not have leading zeros (except for "0" itself or decimals like "0.5")
|
||||
if (token.length > 1 && token[0] === '0' && token[1] !== '.') {
|
||||
return false
|
||||
}
|
||||
|
||||
// Check if it's a valid number
|
||||
const num = Number(token)
|
||||
return !Number.isNaN(num) && Number.isFinite(num)
|
||||
}
|
||||
|
||||
export function parseStringLiteral(token: string): string {
|
||||
const trimmed = token.trim()
|
||||
|
||||
if (trimmed.startsWith(DOUBLE_QUOTE)) {
|
||||
// Find the closing quote, accounting for escaped quotes
|
||||
let i = 1
|
||||
while (i < trimmed.length) {
|
||||
if (trimmed[i] === BACKSLASH && i + 1 < trimmed.length) {
|
||||
// Skip escaped character
|
||||
i += 2
|
||||
continue
|
||||
}
|
||||
if (trimmed[i] === DOUBLE_QUOTE) {
|
||||
// Found closing quote
|
||||
if (i !== trimmed.length - 1) {
|
||||
throw new Error('Unexpected characters after closing quote')
|
||||
}
|
||||
const content = trimmed.slice(1, i)
|
||||
return unescapeString(content)
|
||||
}
|
||||
i++
|
||||
}
|
||||
|
||||
// If we get here, no closing quote was found
|
||||
throw new Error('Unterminated string: missing closing quote')
|
||||
}
|
||||
|
||||
return trimmed
|
||||
}
|
||||
|
||||
export function unescapeString(value: string): string {
|
||||
let result = ''
|
||||
let i = 0
|
||||
|
||||
while (i < value.length) {
|
||||
if (value[i] === BACKSLASH) {
|
||||
if (i + 1 >= value.length) {
|
||||
throw new Error('Invalid escape sequence: backslash at end of string')
|
||||
}
|
||||
|
||||
const next = value[i + 1]
|
||||
if (next === 'n') {
|
||||
result += NEWLINE
|
||||
i += 2
|
||||
continue
|
||||
}
|
||||
if (next === 't') {
|
||||
result += TAB
|
||||
i += 2
|
||||
continue
|
||||
}
|
||||
if (next === 'r') {
|
||||
result += CARRIAGE_RETURN
|
||||
i += 2
|
||||
continue
|
||||
}
|
||||
if (next === BACKSLASH) {
|
||||
result += BACKSLASH
|
||||
i += 2
|
||||
continue
|
||||
}
|
||||
if (next === DOUBLE_QUOTE) {
|
||||
result += DOUBLE_QUOTE
|
||||
i += 2
|
||||
continue
|
||||
}
|
||||
|
||||
throw new Error(`Invalid escape sequence: \\${next}`)
|
||||
}
|
||||
|
||||
result += value[i]
|
||||
i++
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
export function parseUnquotedKey(content: string, start: number): { key: string, end: number } {
|
||||
let end = start
|
||||
while (end < content.length && content[end] !== COLON) {
|
||||
end++
|
||||
}
|
||||
|
||||
// Validate that a colon was found
|
||||
if (end >= content.length || content[end] !== COLON) {
|
||||
throw new Error('Missing colon after key')
|
||||
}
|
||||
|
||||
const key = content.slice(start, end).trim()
|
||||
|
||||
// Skip the colon
|
||||
end++
|
||||
|
||||
return { key, end }
|
||||
}
|
||||
|
||||
export function parseQuotedKey(content: string, start: number): { key: string, end: number } {
|
||||
let i = start + 1 // Skip opening quote
|
||||
let keyContent = ''
|
||||
|
||||
while (i < content.length) {
|
||||
if (content[i] === BACKSLASH && i + 1 < content.length) {
|
||||
keyContent += content[i]! + content[i + 1]
|
||||
i += 2
|
||||
continue
|
||||
}
|
||||
|
||||
if (content[i] === DOUBLE_QUOTE) {
|
||||
// Found closing quote
|
||||
const key = unescapeString(keyContent)
|
||||
let end = i + 1
|
||||
|
||||
// Validate and skip colon after quoted key
|
||||
if (end >= content.length || content[end] !== COLON) {
|
||||
throw new Error('Missing colon after key')
|
||||
}
|
||||
end++
|
||||
|
||||
return { key, end }
|
||||
}
|
||||
|
||||
keyContent += content[i]
|
||||
i++
|
||||
}
|
||||
|
||||
throw new Error('Unterminated quoted key')
|
||||
}
|
||||
|
||||
export function parseKeyToken(content: string, start: number): { key: string, end: number } {
|
||||
if (content[start] === DOUBLE_QUOTE) {
|
||||
return parseQuotedKey(content, start)
|
||||
}
|
||||
else {
|
||||
return parseUnquotedKey(content, start)
|
||||
}
|
||||
}
|
||||
|
||||
// #endregion
|
||||
|
||||
// #region Array content detection helpers
|
||||
|
||||
export function isArrayHeaderAfterHyphen(content: string): boolean {
|
||||
return content.trim().startsWith(OPEN_BRACKET) && content.includes(COLON)
|
||||
}
|
||||
|
||||
export function isObjectFirstFieldAfterHyphen(content: string): boolean {
|
||||
return content.includes(COLON)
|
||||
}
|
||||
|
||||
// #endregion
|
||||
63
src/scanner.ts
Normal file
63
src/scanner.ts
Normal file
@@ -0,0 +1,63 @@
|
||||
import type { Depth, ParsedLine } from './types'
|
||||
import { SPACE } from './constants'
|
||||
|
||||
export class LineCursor {
|
||||
private lines: ParsedLine[]
|
||||
private index: number
|
||||
|
||||
constructor(lines: ParsedLine[]) {
|
||||
this.lines = lines
|
||||
this.index = 0
|
||||
}
|
||||
|
||||
peek(): ParsedLine | undefined {
|
||||
return this.lines[this.index]
|
||||
}
|
||||
|
||||
next(): ParsedLine | undefined {
|
||||
return this.lines[this.index++]
|
||||
}
|
||||
|
||||
current(): ParsedLine | undefined {
|
||||
return this.index > 0 ? this.lines[this.index - 1] : undefined
|
||||
}
|
||||
|
||||
advance(): void {
|
||||
this.index++
|
||||
}
|
||||
|
||||
atEnd(): boolean {
|
||||
return this.index >= this.lines.length
|
||||
}
|
||||
|
||||
get length(): number {
|
||||
return this.lines.length
|
||||
}
|
||||
}
|
||||
|
||||
export function toParsedLines(source: string, indentSize: number): ParsedLine[] {
|
||||
if (!source.trim()) {
|
||||
return []
|
||||
}
|
||||
|
||||
const lines = source.split('\n')
|
||||
const parsed: ParsedLine[] = []
|
||||
|
||||
for (const raw of lines) {
|
||||
let indent = 0
|
||||
while (indent < raw.length && raw[indent] === SPACE) {
|
||||
indent++
|
||||
}
|
||||
|
||||
const content = raw.slice(indent)
|
||||
const depth = computeDepthFromIndent(indent, indentSize)
|
||||
|
||||
parsed.push({ raw, indent, content, depth })
|
||||
}
|
||||
|
||||
return parsed
|
||||
}
|
||||
|
||||
function computeDepthFromIndent(indentSpaces: number, indentSize: number): Depth {
|
||||
return Math.floor(indentSpaces / indentSize)
|
||||
}
|
||||
38
src/types.ts
38
src/types.ts
@@ -36,4 +36,42 @@ export type ResolvedEncodeOptions = Readonly<Required<EncodeOptions>>
|
||||
|
||||
// #endregion
|
||||
|
||||
// #region Decoder options
|
||||
|
||||
export interface DecodeOptions {
|
||||
/**
|
||||
* Number of spaces per indentation level.
|
||||
* @default 2
|
||||
*/
|
||||
indent?: number
|
||||
/**
|
||||
* When true, enforce strict validation of array lengths and tabular row counts.
|
||||
* @default true
|
||||
*/
|
||||
strict?: boolean
|
||||
}
|
||||
|
||||
export type ResolvedDecodeOptions = Readonly<Required<DecodeOptions>>
|
||||
|
||||
// #endregion
|
||||
|
||||
// #region Decoder parsing types
|
||||
|
||||
export interface ArrayHeaderInfo {
|
||||
key?: string
|
||||
length: number
|
||||
delimiter: Delimiter
|
||||
fields?: string[]
|
||||
hasLengthMarker: boolean
|
||||
}
|
||||
|
||||
export interface ParsedLine {
|
||||
raw: string
|
||||
depth: Depth
|
||||
indent: number
|
||||
content: string
|
||||
}
|
||||
|
||||
// #endregion
|
||||
|
||||
export type Depth = number
|
||||
|
||||
494
test/decode.test.ts
Normal file
494
test/decode.test.ts
Normal file
@@ -0,0 +1,494 @@
|
||||
import { describe, expect, it } from 'vitest'
|
||||
import { decode } from '../src/index'
|
||||
|
||||
describe('primitives', () => {
|
||||
it('decodes safe unquoted strings', () => {
|
||||
expect(decode('hello')).toBe('hello')
|
||||
expect(decode('Ada_99')).toBe('Ada_99')
|
||||
})
|
||||
|
||||
it('decodes quoted strings and unescapes control characters', () => {
|
||||
expect(decode('""')).toBe('')
|
||||
expect(decode('"line1\\nline2"')).toBe('line1\nline2')
|
||||
expect(decode('"tab\\there"')).toBe('tab\there')
|
||||
expect(decode('"return\\rcarriage"')).toBe('return\rcarriage')
|
||||
expect(decode('"C:\\\\Users\\\\path"')).toBe('C:\\Users\\path')
|
||||
expect(decode('"say \\"hello\\""')).toBe('say "hello"')
|
||||
})
|
||||
|
||||
it('decodes unicode and emoji', () => {
|
||||
expect(decode('café')).toBe('café')
|
||||
expect(decode('你好')).toBe('你好')
|
||||
expect(decode('🚀')).toBe('🚀')
|
||||
expect(decode('hello 👋 world')).toBe('hello 👋 world')
|
||||
})
|
||||
|
||||
it('decodes numbers, booleans and null', () => {
|
||||
expect(decode('42')).toBe(42)
|
||||
expect(decode('3.14')).toBe(3.14)
|
||||
expect(decode('-7')).toBe(-7)
|
||||
expect(decode('true')).toBe(true)
|
||||
expect(decode('false')).toBe(false)
|
||||
expect(decode('null')).toBe(null)
|
||||
})
|
||||
|
||||
it('respects ambiguity quoting (quoted primitives remain strings)', () => {
|
||||
expect(decode('"true"')).toBe('true')
|
||||
expect(decode('"false"')).toBe('false')
|
||||
expect(decode('"null"')).toBe('null')
|
||||
expect(decode('"42"')).toBe('42')
|
||||
expect(decode('"-3.14"')).toBe('-3.14')
|
||||
expect(decode('"1e-6"')).toBe('1e-6')
|
||||
expect(decode('"05"')).toBe('05')
|
||||
})
|
||||
})
|
||||
|
||||
describe('objects (simple)', () => {
|
||||
it('parses objects with primitive values', () => {
|
||||
const toon = 'id: 123\nname: Ada\nactive: true'
|
||||
expect(decode(toon)).toEqual({ id: 123, name: 'Ada', active: true })
|
||||
})
|
||||
|
||||
it('parses null values in objects', () => {
|
||||
const toon = 'id: 123\nvalue: null'
|
||||
expect(decode(toon)).toEqual({ id: 123, value: null })
|
||||
})
|
||||
|
||||
it('parses empty nested object header', () => {
|
||||
expect(decode('user:')).toEqual({ user: {} })
|
||||
})
|
||||
|
||||
it('parses quoted object values with special characters and escapes', () => {
|
||||
expect(decode('note: "a:b"')).toEqual({ note: 'a:b' })
|
||||
expect(decode('note: "a,b"')).toEqual({ note: 'a,b' })
|
||||
expect(decode('text: "line1\\nline2"')).toEqual({ text: 'line1\nline2' })
|
||||
expect(decode('text: "say \\"hello\\""')).toEqual({ text: 'say "hello"' })
|
||||
expect(decode('text: " padded "')).toEqual({ text: ' padded ' })
|
||||
expect(decode('text: " "')).toEqual({ text: ' ' })
|
||||
expect(decode('v: "true"')).toEqual({ v: 'true' })
|
||||
expect(decode('v: "42"')).toEqual({ v: '42' })
|
||||
expect(decode('v: "-7.5"')).toEqual({ v: '-7.5' })
|
||||
})
|
||||
})
|
||||
|
||||
describe('objects (keys)', () => {
|
||||
it('parses quoted keys with special characters and escapes', () => {
|
||||
expect(decode('"order:id": 7')).toEqual({ 'order:id': 7 })
|
||||
expect(decode('"[index]": 5')).toEqual({ '[index]': 5 })
|
||||
expect(decode('"{key}": 5')).toEqual({ '{key}': 5 })
|
||||
expect(decode('"a,b": 1')).toEqual({ 'a,b': 1 })
|
||||
expect(decode('"full name": Ada')).toEqual({ 'full name': 'Ada' })
|
||||
expect(decode('"-lead": 1')).toEqual({ '-lead': 1 })
|
||||
expect(decode('" a ": 1')).toEqual({ ' a ': 1 })
|
||||
expect(decode('"123": x')).toEqual({ 123: 'x' })
|
||||
expect(decode('"": 1')).toEqual({ '': 1 })
|
||||
})
|
||||
|
||||
it('parses dotted keys as identifiers', () => {
|
||||
expect(decode('user.name: Ada')).toEqual({ 'user.name': 'Ada' })
|
||||
expect(decode('_private: 1')).toEqual({ _private: 1 })
|
||||
expect(decode('user_name: 1')).toEqual({ user_name: 1 })
|
||||
})
|
||||
|
||||
it('unescapes control characters and quotes in keys', () => {
|
||||
expect(decode('"line\\nbreak": 1')).toEqual({ 'line\nbreak': 1 })
|
||||
expect(decode('"tab\\there": 2')).toEqual({ 'tab\there': 2 })
|
||||
expect(decode('"he said \\"hi\\"": 1')).toEqual({ 'he said "hi"': 1 })
|
||||
})
|
||||
})
|
||||
|
||||
describe('nested objects', () => {
|
||||
it('parses deeply nested objects with indentation', () => {
|
||||
const toon = 'a:\n b:\n c: deep'
|
||||
expect(decode(toon)).toEqual({ a: { b: { c: 'deep' } } })
|
||||
})
|
||||
})
|
||||
|
||||
describe('arrays of primitives', () => {
|
||||
it('parses string arrays inline', () => {
|
||||
const toon = 'tags[3]: reading,gaming,coding'
|
||||
expect(decode(toon)).toEqual({ tags: ['reading', 'gaming', 'coding'] })
|
||||
})
|
||||
|
||||
it('parses number arrays inline', () => {
|
||||
const toon = 'nums[3]: 1,2,3'
|
||||
expect(decode(toon)).toEqual({ nums: [1, 2, 3] })
|
||||
})
|
||||
|
||||
it('parses mixed primitive arrays inline', () => {
|
||||
const toon = 'data[4]: x,y,true,10'
|
||||
expect(decode(toon)).toEqual({ data: ['x', 'y', true, 10] })
|
||||
})
|
||||
|
||||
it('parses empty arrays', () => {
|
||||
expect(decode('items[0]:')).toEqual({ items: [] })
|
||||
})
|
||||
|
||||
it('parses quoted strings in arrays including empty and whitespace-only', () => {
|
||||
expect(decode('items[1]: ""')).toEqual({ items: [''] })
|
||||
expect(decode('items[3]: a,"",b')).toEqual({ items: ['a', '', 'b'] })
|
||||
expect(decode('items[2]: " "," "')).toEqual({ items: [' ', ' '] })
|
||||
})
|
||||
|
||||
it('parses strings with delimiters and structural tokens in arrays', () => {
|
||||
expect(decode('items[3]: a,"b,c","d:e"')).toEqual({ items: ['a', 'b,c', 'd:e'] })
|
||||
expect(decode('items[4]: x,"true","42","-3.14"')).toEqual({ items: ['x', 'true', '42', '-3.14'] })
|
||||
expect(decode('items[3]: "[5]","- item","{key}"')).toEqual({ items: ['[5]', '- item', '{key}'] })
|
||||
})
|
||||
})
|
||||
|
||||
describe('arrays of objects (tabular and list items)', () => {
|
||||
it('parses tabular arrays of uniform objects', () => {
|
||||
const toon = 'items[2]{sku,qty,price}:\n A1,2,9.99\n B2,1,14.5'
|
||||
expect(decode(toon)).toEqual({
|
||||
items: [
|
||||
{ sku: 'A1', qty: 2, price: 9.99 },
|
||||
{ sku: 'B2', qty: 1, price: 14.5 },
|
||||
],
|
||||
})
|
||||
})
|
||||
|
||||
it('parses nulls and quoted values in tabular rows', () => {
|
||||
const toon = 'items[2]{id,value}:\n 1,null\n 2,"test"'
|
||||
expect(decode(toon)).toEqual({
|
||||
items: [
|
||||
{ id: 1, value: null },
|
||||
{ id: 2, value: 'test' },
|
||||
],
|
||||
})
|
||||
})
|
||||
|
||||
it('parses quoted header keys in tabular arrays', () => {
|
||||
const toon = 'items[2]{"order:id","full name"}:\n 1,Ada\n 2,Bob'
|
||||
expect(decode(toon)).toEqual({
|
||||
items: [
|
||||
{ 'order:id': 1, 'full name': 'Ada' },
|
||||
{ 'order:id': 2, 'full name': 'Bob' },
|
||||
],
|
||||
})
|
||||
})
|
||||
|
||||
it('parses list arrays for non-uniform objects', () => {
|
||||
const toon
|
||||
= 'items[2]:\n'
|
||||
+ ' - id: 1\n'
|
||||
+ ' name: First\n'
|
||||
+ ' - id: 2\n'
|
||||
+ ' name: Second\n'
|
||||
+ ' extra: true'
|
||||
expect(decode(toon)).toEqual({
|
||||
items: [
|
||||
{ id: 1, name: 'First' },
|
||||
{ id: 2, name: 'Second', extra: true },
|
||||
],
|
||||
})
|
||||
})
|
||||
|
||||
it('parses objects with nested values inside list items', () => {
|
||||
const toon
|
||||
= 'items[1]:\n'
|
||||
+ ' - id: 1\n'
|
||||
+ ' nested:\n'
|
||||
+ ' x: 1'
|
||||
expect(decode(toon)).toEqual({
|
||||
items: [{ id: 1, nested: { x: 1 } }],
|
||||
})
|
||||
})
|
||||
|
||||
it('parses nested tabular arrays as first field on hyphen line', () => {
|
||||
const toon
|
||||
= 'items[1]:\n'
|
||||
+ ' - users[2]{id,name}:\n'
|
||||
+ ' 1,Ada\n'
|
||||
+ ' 2,Bob\n'
|
||||
+ ' status: active'
|
||||
expect(decode(toon)).toEqual({
|
||||
items: [
|
||||
{
|
||||
users: [
|
||||
{ id: 1, name: 'Ada' },
|
||||
{ id: 2, name: 'Bob' },
|
||||
],
|
||||
status: 'active',
|
||||
},
|
||||
],
|
||||
})
|
||||
})
|
||||
|
||||
it('parses objects containing arrays (including empty arrays) in list format', () => {
|
||||
const toon
|
||||
= 'items[1]:\n'
|
||||
+ ' - name: test\n'
|
||||
+ ' data[0]:'
|
||||
expect(decode(toon)).toEqual({
|
||||
items: [{ name: 'test', data: [] }],
|
||||
})
|
||||
})
|
||||
|
||||
it('parses arrays of arrays within objects', () => {
|
||||
const toon
|
||||
= 'items[1]:\n'
|
||||
+ ' - matrix[2]:\n'
|
||||
+ ' - [2]: 1,2\n'
|
||||
+ ' - [2]: 3,4\n'
|
||||
+ ' name: grid'
|
||||
expect(decode(toon)).toEqual({
|
||||
items: [{ matrix: [[1, 2], [3, 4]], name: 'grid' }],
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe('arrays of arrays (primitives only)', () => {
|
||||
it('parses nested arrays of primitives', () => {
|
||||
const toon = 'pairs[2]:\n - [2]: a,b\n - [2]: c,d'
|
||||
expect(decode(toon)).toEqual({ pairs: [['a', 'b'], ['c', 'd']] })
|
||||
})
|
||||
|
||||
it('parses quoted strings and mixed lengths in nested arrays', () => {
|
||||
const toon = 'pairs[2]:\n - [2]: a,b\n - [3]: "c,d","e:f","true"'
|
||||
expect(decode(toon)).toEqual({ pairs: [['a', 'b'], ['c,d', 'e:f', 'true']] })
|
||||
})
|
||||
|
||||
it('parses empty inner arrays', () => {
|
||||
const toon = 'pairs[2]:\n - [0]:\n - [0]:'
|
||||
expect(decode(toon)).toEqual({ pairs: [[], []] })
|
||||
})
|
||||
|
||||
it('parses mixed-length inner arrays', () => {
|
||||
const toon = 'pairs[2]:\n - [1]: 1\n - [2]: 2,3'
|
||||
expect(decode(toon)).toEqual({ pairs: [[1], [2, 3]] })
|
||||
})
|
||||
})
|
||||
|
||||
describe('root arrays', () => {
|
||||
it('parses root arrays of primitives (inline)', () => {
|
||||
const toon = '[5]: x,y,"true",true,10'
|
||||
expect(decode(toon)).toEqual(['x', 'y', 'true', true, 10])
|
||||
})
|
||||
|
||||
it('parses root arrays of uniform objects in tabular format', () => {
|
||||
const toon = '[2]{id}:\n 1\n 2'
|
||||
expect(decode(toon)).toEqual([{ id: 1 }, { id: 2 }])
|
||||
})
|
||||
|
||||
it('parses root arrays of non-uniform objects in list format', () => {
|
||||
const toon = '[2]:\n - id: 1\n - id: 2\n name: Ada'
|
||||
expect(decode(toon)).toEqual([{ id: 1 }, { id: 2, name: 'Ada' }])
|
||||
})
|
||||
|
||||
it('parses empty root arrays', () => {
|
||||
expect(decode('[0]:')).toEqual([])
|
||||
})
|
||||
|
||||
it('parses root arrays of arrays', () => {
|
||||
const toon = '[2]:\n - [2]: 1,2\n - [0]:'
|
||||
expect(decode(toon)).toEqual([[1, 2], []])
|
||||
})
|
||||
})
|
||||
|
||||
describe('complex structures', () => {
|
||||
it('parses mixed objects with arrays and nested objects', () => {
|
||||
const toon
|
||||
= 'user:\n'
|
||||
+ ' id: 123\n'
|
||||
+ ' name: Ada\n'
|
||||
+ ' tags[2]: reading,gaming\n'
|
||||
+ ' active: true\n'
|
||||
+ ' prefs[0]:'
|
||||
expect(decode(toon)).toEqual({
|
||||
user: {
|
||||
id: 123,
|
||||
name: 'Ada',
|
||||
tags: ['reading', 'gaming'],
|
||||
active: true,
|
||||
prefs: [],
|
||||
},
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe('mixed arrays', () => {
|
||||
it('parses arrays mixing primitives, objects and strings (list format)', () => {
|
||||
const toon
|
||||
= 'items[3]:\n'
|
||||
+ ' - 1\n'
|
||||
+ ' - a: 1\n'
|
||||
+ ' - text'
|
||||
expect(decode(toon)).toEqual({ items: [1, { a: 1 }, 'text'] })
|
||||
})
|
||||
|
||||
it('parses arrays mixing objects and arrays', () => {
|
||||
const toon
|
||||
= 'items[2]:\n'
|
||||
+ ' - a: 1\n'
|
||||
+ ' - [2]: 1,2'
|
||||
expect(decode(toon)).toEqual({ items: [{ a: 1 }, [1, 2]] })
|
||||
})
|
||||
})
|
||||
|
||||
describe('delimiter options', () => {
|
||||
describe('basic delimiter usage', () => {
|
||||
it.each([
|
||||
{ delimiter: '\t' as const, name: 'tab', header: '[3\t]', joined: 'reading\tgaming\tcoding' },
|
||||
{ delimiter: '|' as const, name: 'pipe', header: '[3|]', joined: 'reading|gaming|coding' },
|
||||
{ delimiter: ',' as const, name: 'comma', header: '[3]', joined: 'reading,gaming,coding' },
|
||||
])('parses primitive arrays with $name delimiter', ({ header, joined }) => {
|
||||
const toon = `tags${header}: ${joined}`
|
||||
expect(decode(toon)).toEqual({ tags: ['reading', 'gaming', 'coding'] })
|
||||
})
|
||||
|
||||
it.each([
|
||||
{ delimiter: '\t' as const, name: 'tab', header: '[2\t]{sku\tqty\tprice}', rows: ['A1\t2\t9.99', 'B2\t1\t14.5'] },
|
||||
{ delimiter: '|' as const, name: 'pipe', header: '[2|]{sku|qty|price}', rows: ['A1|2|9.99', 'B2|1|14.5'] },
|
||||
])('parses tabular arrays with $name delimiter', ({ header, rows }) => {
|
||||
const toon = `items${header}:\n ${rows[0]}\n ${rows[1]}`
|
||||
expect(decode(toon)).toEqual({
|
||||
items: [
|
||||
{ sku: 'A1', qty: 2, price: 9.99 },
|
||||
{ sku: 'B2', qty: 1, price: 14.5 },
|
||||
],
|
||||
})
|
||||
})
|
||||
|
||||
it.each([
|
||||
{ header: '[2\t]', inner: '[2\t]', a: 'a\tb', b: 'c\td' },
|
||||
{ header: '[2|]', inner: '[2|]', a: 'a|b', b: 'c|d' },
|
||||
])('parses nested arrays with custom delimiters', ({ header, inner, a, b }) => {
|
||||
const toon = `pairs${header}:\n - ${inner}: ${a}\n - ${inner}: ${b}`
|
||||
expect(decode(toon)).toEqual({ pairs: [['a', 'b'], ['c', 'd']] })
|
||||
})
|
||||
|
||||
it.each([
|
||||
{ header: '[3\t]', joined: 'x\ty\tz' },
|
||||
{ header: '[3|]', joined: 'x|y|z' },
|
||||
])('parses root arrays of primitives with custom delimiters', ({ header, joined }) => {
|
||||
const toon = `${header}: ${joined}`
|
||||
expect(decode(toon)).toEqual(['x', 'y', 'z'])
|
||||
})
|
||||
|
||||
it.each([
|
||||
{ header: '[2\t]{id}', rows: ['1', '2'] },
|
||||
{ header: '[2|]{id}', rows: ['1', '2'] },
|
||||
])('parses root arrays of objects with custom delimiters', ({ header, rows }) => {
|
||||
const toon = `${header}:\n ${rows[0]}\n ${rows[1]}`
|
||||
expect(decode(toon)).toEqual([{ id: 1 }, { id: 2 }])
|
||||
})
|
||||
})
|
||||
|
||||
describe('delimiter-aware quoting', () => {
|
||||
it.each([
|
||||
{ header: '[3\t]', joined: 'a\t"b\\tc"\td', expected: ['a', 'b\tc', 'd'] },
|
||||
{ header: '[3|]', joined: 'a|"b|c"|d', expected: ['a', 'b|c', 'd'] },
|
||||
])('parses values containing the active delimiter when quoted', ({ header, joined, expected }) => {
|
||||
const toon = `items${header}: ${joined}`
|
||||
expect(decode(toon)).toEqual({ items: expected })
|
||||
})
|
||||
|
||||
it.each([
|
||||
{ header: '[2\t]', joined: 'a,b\tc,d' },
|
||||
{ header: '[2|]', joined: 'a,b|c,d' },
|
||||
])('does not split on commas when using non-comma delimiter', ({ header, joined }) => {
|
||||
const toon = `items${header}: ${joined}`
|
||||
expect(decode(toon)).toEqual({ items: ['a,b', 'c,d'] })
|
||||
})
|
||||
|
||||
it('parses tabular values containing the active delimiter correctly', () => {
|
||||
const comma = 'items[2]{id,note}:\n 1,"a,b"\n 2,"c,d"'
|
||||
expect(decode(comma)).toEqual({ items: [{ id: 1, note: 'a,b' }, { id: 2, note: 'c,d' }] })
|
||||
|
||||
const tab = 'items[2\t]{id\tnote}:\n 1\ta,b\n 2\tc,d'
|
||||
expect(decode(tab)).toEqual({ items: [{ id: 1, note: 'a,b' }, { id: 2, note: 'c,d' }] })
|
||||
})
|
||||
|
||||
it('does not require quoting commas in object values when using non-comma delimiter elsewhere', () => {
|
||||
expect(decode('note: a,b')).toEqual({ note: 'a,b' })
|
||||
})
|
||||
|
||||
it('parses nested array values containing the active delimiter', () => {
|
||||
expect(decode('pairs[1|]:\n - [2|]: a|"b|c"')).toEqual({ pairs: [['a', 'b|c']] })
|
||||
expect(decode('pairs[1\t]:\n - [2\t]: a\t"b\\tc"')).toEqual({ pairs: [['a', 'b\tc']] })
|
||||
})
|
||||
})
|
||||
|
||||
describe('delimiter-independent quoting rules', () => {
|
||||
it('preserves quoted ambiguity regardless of delimiter', () => {
|
||||
expect(decode('items[3|]: "true"|"42"|"-3.14"')).toEqual({ items: ['true', '42', '-3.14'] })
|
||||
expect(decode('items[3\t]: "true"\t"42"\t"-3.14"')).toEqual({ items: ['true', '42', '-3.14'] })
|
||||
})
|
||||
|
||||
it('parses structural-looking strings when quoted', () => {
|
||||
expect(decode('items[3|]: "[5]"|"{key}"|"- item"')).toEqual({ items: ['[5]', '{key}', '- item'] })
|
||||
expect(decode('items[3\t]: "[5]"\t"{key}"\t"- item"')).toEqual({ items: ['[5]', '{key}', '- item'] })
|
||||
})
|
||||
|
||||
it('parses tabular headers with keys containing the active delimiter', () => {
|
||||
const toon = 'items[2|]{"a|b"}:\n 1\n 2'
|
||||
expect(decode(toon)).toEqual({ items: [{ 'a|b': 1 }, { 'a|b': 2 }] })
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe('length marker option', () => {
|
||||
it('accepts length marker on primitive arrays', () => {
|
||||
expect(decode('tags[#3]: reading,gaming,coding')).toEqual({ tags: ['reading', 'gaming', 'coding'] })
|
||||
})
|
||||
|
||||
it('accepts length marker on empty arrays', () => {
|
||||
expect(decode('items[#0]:')).toEqual({ items: [] })
|
||||
})
|
||||
|
||||
it('accepts length marker on tabular arrays', () => {
|
||||
const toon = 'items[#2]{sku,qty,price}:\n A1,2,9.99\n B2,1,14.5'
|
||||
expect(decode(toon)).toEqual({
|
||||
items: [
|
||||
{ sku: 'A1', qty: 2, price: 9.99 },
|
||||
{ sku: 'B2', qty: 1, price: 14.5 },
|
||||
],
|
||||
})
|
||||
})
|
||||
|
||||
it('accepts length marker on nested arrays', () => {
|
||||
const toon = 'pairs[#2]:\n - [#2]: a,b\n - [#2]: c,d'
|
||||
expect(decode(toon)).toEqual({ pairs: [['a', 'b'], ['c', 'd']] })
|
||||
})
|
||||
|
||||
it('works with custom delimiters and length marker', () => {
|
||||
expect(decode('tags[#3|]: reading|gaming|coding')).toEqual({ tags: ['reading', 'gaming', 'coding'] })
|
||||
})
|
||||
})
|
||||
|
||||
describe('error handling', () => {
|
||||
it('throws on array length mismatch (inline primitives)', () => {
|
||||
const toon = 'tags[2]: a,b,c'
|
||||
expect(() => decode(toon)).toThrow()
|
||||
})
|
||||
|
||||
it('throws on array length mismatch (list format)', () => {
|
||||
const toon = 'items[1]:\n - 1\n - 2'
|
||||
expect(() => decode(toon)).toThrow()
|
||||
})
|
||||
|
||||
it('throws when tabular row value count does not match header field count', () => {
|
||||
const toon = 'items[2]{id,name}:\n 1,Ada\n 2'
|
||||
expect(() => decode(toon)).toThrow()
|
||||
})
|
||||
|
||||
it('throws when tabular row count does not match header length', () => {
|
||||
const toon = '[1]{id}:\n 1\n 2'
|
||||
expect(() => decode(toon)).toThrow()
|
||||
})
|
||||
|
||||
it('throws on invalid escape sequences', () => {
|
||||
expect(() => decode('"a\\x"')).toThrow()
|
||||
expect(() => decode('"unterminated')).toThrow()
|
||||
})
|
||||
|
||||
it('throws on missing colon in key-value context', () => {
|
||||
expect(() => decode('a:\n user')).toThrow()
|
||||
})
|
||||
|
||||
it('throws on delimiter mismatch', () => {
|
||||
const toon = 'items[2\t]{a\tb}:\n 1,2\n 3,4'
|
||||
expect(() => decode(toon)).toThrow()
|
||||
})
|
||||
})
|
||||
Reference in New Issue
Block a user