mirror of
https://github.com/voson-wang/toon.git
synced 2026-01-29 23:34:10 +08:00
feat: decode method (#10)
This commit is contained in:
84
README.md
84
README.md
@@ -50,21 +50,25 @@ users[2]{id,name,role}:
|
|||||||
|
|
||||||
```
|
```
|
||||||
⭐ GitHub Repositories ██████████████░░░░░░░░░░░ 8,745 tokens
|
⭐ GitHub Repositories ██████████████░░░░░░░░░░░ 8,745 tokens
|
||||||
vs JSON: 15,145 💰 42.3% saved
|
vs JSON: 15,145 (-42.3%)
|
||||||
vs XML: 17,095 💰 48.8% saved
|
vs YAML: 13,129 (-33.4%)
|
||||||
|
vs XML: 17,095 (-48.8%)
|
||||||
|
|
||||||
📈 Daily Analytics ██████████░░░░░░░░░░░░░░░ 4,507 tokens
|
📈 Daily Analytics ██████████░░░░░░░░░░░░░░░ 4,507 tokens
|
||||||
vs JSON: 10,977 💰 58.9% saved
|
vs JSON: 10,977 (-58.9%)
|
||||||
vs XML: 13,128 💰 65.7% saved
|
vs YAML: 8,810 (-48.8%)
|
||||||
|
vs XML: 13,128 (-65.7%)
|
||||||
|
|
||||||
🛒 E-Commerce Order ████████████████░░░░░░░░░ 166 tokens
|
🛒 E-Commerce Order ████████████████░░░░░░░░░ 166 tokens
|
||||||
vs JSON: 257 💰 35.4% saved
|
vs JSON: 257 (-35.4%)
|
||||||
vs XML: 271 💰 38.7% saved
|
vs YAML: 197 (-15.7%)
|
||||||
|
vs XML: 271 (-38.7%)
|
||||||
|
|
||||||
─────────────────────────────────────────────────────────────────────
|
─────────────────────────────────────────────────────────────────────
|
||||||
Total ████████████░░░░░░░░░░░░░ 13,418 tokens
|
Total █████████████░░░░░░░░░░░░ 13,418 tokens
|
||||||
vs JSON: 26,379 💰 49.1% saved
|
vs JSON: 26,379 (-49.1%)
|
||||||
vs XML: 30,494 💰 56.0% saved
|
vs YAML: 22,136 (-39.4%)
|
||||||
|
vs XML: 30,494 (-56.0%)
|
||||||
```
|
```
|
||||||
|
|
||||||
<details>
|
<details>
|
||||||
@@ -371,7 +375,7 @@ Four datasets designed to test different structural patterns:
|
|||||||
|
|
||||||
#### Evaluation Process
|
#### Evaluation Process
|
||||||
|
|
||||||
1. **Format conversion:** Each dataset is converted to all 5 formats (TOON, CSV, XML, JSON, YAML).
|
1. **Format conversion**: Each dataset is converted to all 5 formats (TOON, CSV, XML, JSON, YAML).
|
||||||
2. **Query LLM**: Each model receives formatted data + question in a prompt and extracts the answer.
|
2. **Query LLM**: Each model receives formatted data + question in a prompt and extracts the answer.
|
||||||
3. **Validate with LLM-as-judge**: `gpt-5-nano` validates if the answer is semantically correct (e.g., `50000` = `$50,000`, `Engineering` = `engineering`, `2025-01-01` = `January 1, 2025`).
|
3. **Validate with LLM-as-judge**: `gpt-5-nano` validates if the answer is semantically correct (e.g., `50000` = `$50,000`, `Engineering` = `engineering`, `2025-01-01` = `January 1, 2025`).
|
||||||
|
|
||||||
@@ -764,6 +768,48 @@ encode(data, { lengthMarker: '#', delimiter: '|' })
|
|||||||
// B2|1|14.5
|
// B2|1|14.5
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### `decode(input: string, options?: DecodeOptions): JsonValue`
|
||||||
|
|
||||||
|
Converts a TOON-formatted string back to JavaScript values.
|
||||||
|
|
||||||
|
**Parameters:**
|
||||||
|
|
||||||
|
- `input` – A TOON-formatted string to parse
|
||||||
|
- `options` – Optional decoding options:
|
||||||
|
- `indent?: number` – Expected number of spaces per indentation level (default: `2`)
|
||||||
|
- `strict?: boolean` – Enable strict validation (default: `true`)
|
||||||
|
|
||||||
|
**Returns:**
|
||||||
|
|
||||||
|
A JavaScript value (object, array, or primitive) representing the parsed TOON data.
|
||||||
|
|
||||||
|
**Example:**
|
||||||
|
|
||||||
|
```ts
|
||||||
|
import { decode } from '@byjohann/toon'
|
||||||
|
|
||||||
|
const toon = `items[2]{sku,qty,price}:
|
||||||
|
A1,2,9.99
|
||||||
|
B2,1,14.5`
|
||||||
|
|
||||||
|
const data = decode(toon)
|
||||||
|
// {
|
||||||
|
// items: [
|
||||||
|
// { sku: 'A1', qty: 2, price: 9.99 },
|
||||||
|
// { sku: 'B2', qty: 1, price: 14.5 }
|
||||||
|
// ]
|
||||||
|
// }
|
||||||
|
```
|
||||||
|
|
||||||
|
**Strict Mode:**
|
||||||
|
|
||||||
|
By default, the decoder validates input strictly:
|
||||||
|
|
||||||
|
- **Invalid escape sequences** – Throws on `"\x"`, unterminated strings
|
||||||
|
- **Syntax errors** – Throws on missing colons, malformed headers
|
||||||
|
- **Array length mismatches** – Throws when declared length doesn't match actual count
|
||||||
|
- **Delimiter mismatches** – Throws when row delimiters don't match header
|
||||||
|
|
||||||
## Notes and Limitations
|
## Notes and Limitations
|
||||||
|
|
||||||
- Format familiarity matters as much as token count. TOON's tabular format requires arrays of objects with identical keys and primitive values only – when this doesn't hold (due to mixed types, non-uniform objects, or nested structures), TOON switches to list format where JSON can be cheaper at scale.
|
- Format familiarity matters as much as token count. TOON's tabular format requires arrays of objects with identical keys and primitive values only – when this doesn't hold (due to mixed types, non-uniform objects, or nested structures), TOON switches to list format where JSON can be cheaper at scale.
|
||||||
@@ -785,7 +831,7 @@ Wrap your encoded data in a fenced code block (label it \`\`\`toon for clarity).
|
|||||||
For output, be more explicit. When you want the model to **generate** TOON:
|
For output, be more explicit. When you want the model to **generate** TOON:
|
||||||
|
|
||||||
- **Show the expected header** (`users[N]{id,name,role}:`). The model fills rows instead of repeating keys, reducing generation errors.
|
- **Show the expected header** (`users[N]{id,name,role}:`). The model fills rows instead of repeating keys, reducing generation errors.
|
||||||
- **State the rules**: 2-space indent, no trailing spaces, `[N]` matches row count.
|
- **State the rules:** 2-space indent, no trailing spaces, `[N]` matches row count.
|
||||||
|
|
||||||
Here's a prompt that works for both reading and generating:
|
Here's a prompt that works for both reading and generating:
|
||||||
|
|
||||||
@@ -850,16 +896,16 @@ Task: Return only users with role "user" as TOON. Use the same header. Set [N] t
|
|||||||
|
|
||||||
## Ports in Other Languages
|
## Ports in Other Languages
|
||||||
|
|
||||||
- **Elixir**: [toon_ex](https://github.com/kentaro/toon_ex)
|
- **Elixir:** [toon_ex](https://github.com/kentaro/toon_ex)
|
||||||
- **PHP**: [toon-php](https://github.com/HelgeSverre/toon-php)
|
- **PHP:** [toon-php](https://github.com/HelgeSverre/toon-php)
|
||||||
- **Python**: [pytoon](https://github.com/bpradana/pytoon)
|
- **Python:** [pytoon](https://github.com/bpradana/pytoon)
|
||||||
- [python-toon](https://github.com/xaviviro/python-toon)
|
- [python-toon](https://github.com/xaviviro/python-toon)
|
||||||
- [toon-python](https://gitlab.com/KanTakahiro/toon-python)
|
- [toon-python](https://gitlab.com/KanTakahiro/toon-python)
|
||||||
- **Ruby**: [toon-ruby](https://github.com/andrepcg/toon-ruby)
|
- **Ruby:** [toon-ruby](https://github.com/andrepcg/toon-ruby)
|
||||||
- **Java**: [JToon](https://github.com/felipestanzani/JToon)
|
- **Java:** [JToon](https://github.com/felipestanzani/JToon)
|
||||||
- **.NET**: [toon.NET](https://github.com/ghost1face/toon.NET)
|
- **.NET:** [toon.NET](https://github.com/ghost1face/toon.NET)
|
||||||
- **Swift**: [TOONEncoder](https://github.com/mattt/TOONEncoder)
|
- **Swift:** [TOONEncoder](https://github.com/mattt/TOONEncoder)
|
||||||
- **Go** [gotoon](https://github.com/alpkeskin/gotoon)
|
- **Go:** [gotoon](https://github.com/alpkeskin/gotoon)
|
||||||
|
|
||||||
## License
|
## License
|
||||||
|
|
||||||
|
|||||||
@@ -159,7 +159,7 @@ Four datasets designed to test different structural patterns:
|
|||||||
|
|
||||||
#### Evaluation Process
|
#### Evaluation Process
|
||||||
|
|
||||||
1. **Format conversion:** Each dataset is converted to all 5 formats (TOON, CSV, XML, JSON, YAML).
|
1. **Format conversion**: Each dataset is converted to all 5 formats (TOON, CSV, XML, JSON, YAML).
|
||||||
2. **Query LLM**: Each model receives formatted data + question in a prompt and extracts the answer.
|
2. **Query LLM**: Each model receives formatted data + question in a prompt and extracts the answer.
|
||||||
3. **Validate with LLM-as-judge**: `gpt-5-nano` validates if the answer is semantically correct (e.g., `50000` = `$50,000`, `Engineering` = `engineering`, `2025-01-01` = `January 1, 2025`).
|
3. **Validate with LLM-as-judge**: `gpt-5-nano` validates if the answer is semantically correct (e.g., `50000` = `$50,000`, `Engineering` = `engineering`, `2025-01-01` = `January 1, 2025`).
|
||||||
|
|
||||||
|
|||||||
@@ -248,7 +248,7 @@ ${totalQuestions} questions are generated dynamically across three categories:
|
|||||||
|
|
||||||
#### Evaluation Process
|
#### Evaluation Process
|
||||||
|
|
||||||
1. **Format conversion:** Each dataset is converted to all ${formatCount} formats (${formatResults.map(f => f.format.toUpperCase()).join(', ')}).
|
1. **Format conversion**: Each dataset is converted to all ${formatCount} formats (${formatResults.map(f => f.format.toUpperCase()).join(', ')}).
|
||||||
2. **Query LLM**: Each model receives formatted data + question in a prompt and extracts the answer.
|
2. **Query LLM**: Each model receives formatted data + question in a prompt and extracts the answer.
|
||||||
3. **Validate with LLM-as-judge**: \`gpt-5-nano\` validates if the answer is semantically correct (e.g., \`50000\` = \`$50,000\`, \`Engineering\` = \`engineering\`, \`2025-01-01\` = \`January 1, 2025\`).
|
3. **Validate with LLM-as-judge**: \`gpt-5-nano\` validates if the answer is semantically correct (e.g., \`50000\` = \`$50,000\`, \`Engineering\` = \`engineering\`, \`2025-01-01\` = \`January 1, 2025\`).
|
||||||
|
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ export const COMMA = ','
|
|||||||
export const COLON = ':'
|
export const COLON = ':'
|
||||||
export const SPACE = ' '
|
export const SPACE = ' '
|
||||||
export const PIPE = '|'
|
export const PIPE = '|'
|
||||||
|
export const HASH = '#'
|
||||||
|
|
||||||
// #endregion
|
// #endregion
|
||||||
|
|
||||||
|
|||||||
419
src/decoders.ts
Normal file
419
src/decoders.ts
Normal file
@@ -0,0 +1,419 @@
|
|||||||
|
import type { LineCursor } from './scanner'
|
||||||
|
import type {
|
||||||
|
ArrayHeaderInfo,
|
||||||
|
Depth,
|
||||||
|
JsonArray,
|
||||||
|
JsonObject,
|
||||||
|
JsonPrimitive,
|
||||||
|
JsonValue,
|
||||||
|
ParsedLine,
|
||||||
|
ResolvedDecodeOptions,
|
||||||
|
} from './types'
|
||||||
|
import {
|
||||||
|
COLON,
|
||||||
|
DEFAULT_DELIMITER,
|
||||||
|
LIST_ITEM_PREFIX,
|
||||||
|
} from './constants'
|
||||||
|
import {
|
||||||
|
isArrayHeaderAfterHyphen,
|
||||||
|
isObjectFirstFieldAfterHyphen,
|
||||||
|
parseArrayHeaderLine,
|
||||||
|
parseKeyToken,
|
||||||
|
parsePrimitiveToken,
|
||||||
|
parseRowValuesToPrimitives,
|
||||||
|
splitDelimitedValues,
|
||||||
|
} from './parser'
|
||||||
|
|
||||||
|
// #region Entry decoding
|
||||||
|
|
||||||
|
export function decodeValueFromLines(cursor: LineCursor, options: ResolvedDecodeOptions): JsonValue {
|
||||||
|
const first = cursor.peek()
|
||||||
|
if (!first) {
|
||||||
|
throw new Error('No content to decode')
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for root array
|
||||||
|
if (isRootArrayHeaderLine(first)) {
|
||||||
|
const headerInfo = parseArrayHeaderLine(first.content, DEFAULT_DELIMITER)
|
||||||
|
if (headerInfo) {
|
||||||
|
cursor.advance() // Move past the header line
|
||||||
|
return decodeArrayFromHeader(headerInfo.header, first, cursor, 0, options)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for single primitive value
|
||||||
|
if (cursor.length === 1 && !isKeyValueLine(first)) {
|
||||||
|
return parsePrimitiveToken(first.content.trim())
|
||||||
|
}
|
||||||
|
|
||||||
|
// Default to object
|
||||||
|
return decodeObject(cursor, 0, options)
|
||||||
|
}
|
||||||
|
|
||||||
|
function isRootArrayHeaderLine(line: ParsedLine): boolean {
|
||||||
|
const content = line.content.trim()
|
||||||
|
// Root array: starts with [ and has a colon
|
||||||
|
return content.startsWith('[') && content.includes(COLON)
|
||||||
|
}
|
||||||
|
|
||||||
|
function isKeyValueLine(line: ParsedLine): boolean {
|
||||||
|
const content = line.content
|
||||||
|
// Look for unquoted colon or quoted key followed by colon
|
||||||
|
if (content.startsWith('"')) {
|
||||||
|
// Quoted key
|
||||||
|
let i = 1
|
||||||
|
while (i < content.length) {
|
||||||
|
if (content[i] === '\\' && i + 1 < content.length) {
|
||||||
|
i += 2
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if (content[i] === '"') {
|
||||||
|
// Found end of quoted key, check for colon
|
||||||
|
return content[i + 1] === COLON
|
||||||
|
}
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// Unquoted key - look for first colon not inside quotes
|
||||||
|
return content.includes(COLON)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// #endregion
|
||||||
|
|
||||||
|
// #region Object decoding
|
||||||
|
|
||||||
|
function decodeObject(cursor: LineCursor, baseDepth: Depth, options: ResolvedDecodeOptions): JsonObject {
|
||||||
|
const obj: JsonObject = {}
|
||||||
|
|
||||||
|
while (!cursor.atEnd()) {
|
||||||
|
const line = cursor.peek()
|
||||||
|
if (!line || line.depth < baseDepth) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
if (line.depth === baseDepth) {
|
||||||
|
const [key, value] = decodeKeyValuePair(line, cursor, baseDepth, options)
|
||||||
|
obj[key] = value
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return obj
|
||||||
|
}
|
||||||
|
|
||||||
|
function decodeKeyValuePair(
|
||||||
|
line: ParsedLine,
|
||||||
|
cursor: LineCursor,
|
||||||
|
baseDepth: Depth,
|
||||||
|
options: ResolvedDecodeOptions,
|
||||||
|
): [key: string, value: JsonValue] {
|
||||||
|
cursor.advance()
|
||||||
|
|
||||||
|
// Check for array header first (before parsing key)
|
||||||
|
const arrayHeader = parseArrayHeaderLine(line.content, DEFAULT_DELIMITER)
|
||||||
|
if (arrayHeader && arrayHeader.header.key) {
|
||||||
|
const value = decodeArrayFromHeader(arrayHeader.header, line, cursor, baseDepth, options)
|
||||||
|
return [arrayHeader.header.key, value]
|
||||||
|
}
|
||||||
|
|
||||||
|
// Regular key-value pair
|
||||||
|
const { key, end } = parseKeyToken(line.content, 0)
|
||||||
|
const rest = line.content.slice(end).trim()
|
||||||
|
|
||||||
|
// No value after colon - expect nested object or empty
|
||||||
|
if (!rest) {
|
||||||
|
const nextLine = cursor.peek()
|
||||||
|
if (nextLine && nextLine.depth > baseDepth) {
|
||||||
|
const nested = expectNestedObject(cursor, baseDepth + 1, options)
|
||||||
|
return [key, nested]
|
||||||
|
}
|
||||||
|
// Empty object
|
||||||
|
return [key, {}]
|
||||||
|
}
|
||||||
|
|
||||||
|
// Inline primitive value
|
||||||
|
const value = parsePrimitiveToken(rest)
|
||||||
|
return [key, value]
|
||||||
|
}
|
||||||
|
|
||||||
|
function expectNestedObject(cursor: LineCursor, nestedDepth: Depth, options: ResolvedDecodeOptions): JsonObject {
|
||||||
|
return decodeObject(cursor, nestedDepth, options)
|
||||||
|
}
|
||||||
|
|
||||||
|
// #endregion
|
||||||
|
|
||||||
|
// #region Array decoding
|
||||||
|
|
||||||
|
function decodeArrayFromHeader(
|
||||||
|
header: ArrayHeaderInfo,
|
||||||
|
line: ParsedLine,
|
||||||
|
cursor: LineCursor,
|
||||||
|
baseDepth: Depth,
|
||||||
|
options: ResolvedDecodeOptions,
|
||||||
|
): JsonArray {
|
||||||
|
const arrayHeader = parseArrayHeaderLine(line.content, DEFAULT_DELIMITER)
|
||||||
|
if (!arrayHeader) {
|
||||||
|
throw new Error('Invalid array header')
|
||||||
|
}
|
||||||
|
|
||||||
|
// Inline primitive array
|
||||||
|
if (arrayHeader.inlineValues) {
|
||||||
|
// For inline arrays, cursor should already be advanced or will be by caller
|
||||||
|
return decodeInlinePrimitiveArray(header, arrayHeader.inlineValues, options)
|
||||||
|
}
|
||||||
|
|
||||||
|
// For multi-line arrays (tabular or list), the cursor should already be positioned
|
||||||
|
// at the array header line, but we haven't advanced past it yet
|
||||||
|
|
||||||
|
// Tabular array
|
||||||
|
if (header.fields && header.fields.length > 0) {
|
||||||
|
return decodeTabularArray(header, cursor, baseDepth, options)
|
||||||
|
}
|
||||||
|
|
||||||
|
// List array
|
||||||
|
return decodeListArray(header, cursor, baseDepth, options)
|
||||||
|
}
|
||||||
|
|
||||||
|
function decodeInlinePrimitiveArray(
|
||||||
|
header: ArrayHeaderInfo,
|
||||||
|
inlineValues: string,
|
||||||
|
options: ResolvedDecodeOptions,
|
||||||
|
): JsonPrimitive[] {
|
||||||
|
if (!inlineValues.trim()) {
|
||||||
|
assertExpectedCount(0, header.length, 'inline array items', options)
|
||||||
|
return []
|
||||||
|
}
|
||||||
|
|
||||||
|
const values = splitDelimitedValues(inlineValues, header.delimiter)
|
||||||
|
const primitives = parseRowValuesToPrimitives(values)
|
||||||
|
|
||||||
|
assertExpectedCount(primitives.length, header.length, 'inline array items', options)
|
||||||
|
|
||||||
|
return primitives
|
||||||
|
}
|
||||||
|
|
||||||
|
function decodeListArray(
|
||||||
|
header: ArrayHeaderInfo,
|
||||||
|
cursor: LineCursor,
|
||||||
|
baseDepth: Depth,
|
||||||
|
options: ResolvedDecodeOptions,
|
||||||
|
): JsonValue[] {
|
||||||
|
const items: JsonValue[] = []
|
||||||
|
const itemDepth = baseDepth + 1
|
||||||
|
|
||||||
|
while (!cursor.atEnd() && items.length < header.length) {
|
||||||
|
const line = cursor.peek()
|
||||||
|
if (!line || line.depth < itemDepth) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
if (line.depth === itemDepth && line.content.startsWith(LIST_ITEM_PREFIX)) {
|
||||||
|
const item = decodeListItem(cursor, itemDepth, header.delimiter, options)
|
||||||
|
items.push(item)
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
assertExpectedCount(items.length, header.length, 'list array items', options)
|
||||||
|
|
||||||
|
// In strict mode, check for extra items
|
||||||
|
if (options.strict && !cursor.atEnd()) {
|
||||||
|
const nextLine = cursor.peek()
|
||||||
|
if (nextLine && nextLine.depth === itemDepth && nextLine.content.startsWith(LIST_ITEM_PREFIX)) {
|
||||||
|
throw new Error(`Expected ${header.length} list array items, but found more`)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return items
|
||||||
|
}
|
||||||
|
|
||||||
|
function decodeTabularArray(
|
||||||
|
header: ArrayHeaderInfo,
|
||||||
|
cursor: LineCursor,
|
||||||
|
baseDepth: Depth,
|
||||||
|
options: ResolvedDecodeOptions,
|
||||||
|
): JsonObject[] {
|
||||||
|
const objects: JsonObject[] = []
|
||||||
|
const rowDepth = baseDepth + 1
|
||||||
|
|
||||||
|
while (!cursor.atEnd() && objects.length < header.length) {
|
||||||
|
const line = cursor.peek()
|
||||||
|
if (!line || line.depth < rowDepth) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
if (line.depth === rowDepth) {
|
||||||
|
cursor.advance()
|
||||||
|
const values = splitDelimitedValues(line.content, header.delimiter)
|
||||||
|
assertExpectedCount(values.length, header.fields!.length, 'tabular row values', options)
|
||||||
|
|
||||||
|
const primitives = parseRowValuesToPrimitives(values)
|
||||||
|
const obj: JsonObject = {}
|
||||||
|
|
||||||
|
for (let i = 0; i < header.fields!.length; i++) {
|
||||||
|
obj[header.fields![i]!] = primitives[i]!
|
||||||
|
}
|
||||||
|
|
||||||
|
objects.push(obj)
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
assertExpectedCount(objects.length, header.length, 'tabular rows', options)
|
||||||
|
|
||||||
|
// In strict mode, check for extra rows
|
||||||
|
if (options.strict && !cursor.atEnd()) {
|
||||||
|
const nextLine = cursor.peek()
|
||||||
|
if (nextLine && nextLine.depth === rowDepth && !nextLine.content.startsWith(LIST_ITEM_PREFIX)) {
|
||||||
|
// A key-value pair has a colon (and if it has delimiter, colon comes first)
|
||||||
|
// A data row either has no colon, or has delimiter before colon
|
||||||
|
const hasColon = nextLine.content.includes(COLON)
|
||||||
|
const hasDelimiter = nextLine.content.includes(header.delimiter)
|
||||||
|
|
||||||
|
if (!hasColon) {
|
||||||
|
// No colon = data row (for single-field tables)
|
||||||
|
throw new Error(`Expected ${header.length} tabular rows, but found more`)
|
||||||
|
}
|
||||||
|
else if (hasDelimiter) {
|
||||||
|
// Has both colon and delimiter - check which comes first
|
||||||
|
const colonPos = nextLine.content.indexOf(COLON)
|
||||||
|
const delimiterPos = nextLine.content.indexOf(header.delimiter)
|
||||||
|
if (delimiterPos < colonPos) {
|
||||||
|
// Delimiter before colon = data row
|
||||||
|
throw new Error(`Expected ${header.length} tabular rows, but found more`)
|
||||||
|
}
|
||||||
|
// Colon before delimiter = key-value pair, OK
|
||||||
|
}
|
||||||
|
// Has colon but no delimiter = key-value pair, OK
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return objects
|
||||||
|
}
|
||||||
|
|
||||||
|
// #endregion
|
||||||
|
|
||||||
|
// #region List item decoding
|
||||||
|
|
||||||
|
function decodeListItem(
|
||||||
|
cursor: LineCursor,
|
||||||
|
baseDepth: Depth,
|
||||||
|
activeDelimiter: string,
|
||||||
|
options: ResolvedDecodeOptions,
|
||||||
|
): JsonValue {
|
||||||
|
const line = cursor.next()
|
||||||
|
if (!line) {
|
||||||
|
throw new Error('Expected list item')
|
||||||
|
}
|
||||||
|
|
||||||
|
const afterHyphen = line.content.slice(LIST_ITEM_PREFIX.length)
|
||||||
|
|
||||||
|
// Check for array header after hyphen
|
||||||
|
if (isArrayHeaderAfterHyphen(afterHyphen)) {
|
||||||
|
const arrayHeader = parseArrayHeaderLine(afterHyphen, activeDelimiter as any)
|
||||||
|
if (arrayHeader) {
|
||||||
|
return decodeArrayFromHeader(arrayHeader.header, line, cursor, baseDepth, options)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for object first field after hyphen
|
||||||
|
if (isObjectFirstFieldAfterHyphen(afterHyphen)) {
|
||||||
|
return decodeObjectFromListItem(line, cursor, baseDepth, options)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Primitive value
|
||||||
|
return parsePrimitiveToken(afterHyphen)
|
||||||
|
}
|
||||||
|
|
||||||
|
function decodeObjectFromListItem(
|
||||||
|
firstLine: ParsedLine,
|
||||||
|
cursor: LineCursor,
|
||||||
|
baseDepth: Depth,
|
||||||
|
options: ResolvedDecodeOptions,
|
||||||
|
): JsonObject {
|
||||||
|
const afterHyphen = firstLine.content.slice(LIST_ITEM_PREFIX.length)
|
||||||
|
const { key, value, followDepth } = decodeFirstFieldOnHyphen(afterHyphen, cursor, baseDepth, options)
|
||||||
|
|
||||||
|
const obj: JsonObject = { [key]: value }
|
||||||
|
|
||||||
|
// Read subsequent fields
|
||||||
|
while (!cursor.atEnd()) {
|
||||||
|
const line = cursor.peek()
|
||||||
|
if (!line || line.depth < followDepth) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
if (line.depth === followDepth && !line.content.startsWith(LIST_ITEM_PREFIX)) {
|
||||||
|
const [k, v] = decodeKeyValuePair(line, cursor, followDepth, options)
|
||||||
|
obj[k] = v
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return obj
|
||||||
|
}
|
||||||
|
|
||||||
|
function decodeFirstFieldOnHyphen(
|
||||||
|
rest: string,
|
||||||
|
cursor: LineCursor,
|
||||||
|
baseDepth: Depth,
|
||||||
|
options: ResolvedDecodeOptions,
|
||||||
|
): { key: string, value: JsonValue, followDepth: Depth } {
|
||||||
|
// Check for array header as first field
|
||||||
|
const arrayHeader = parseArrayHeaderLine(rest, DEFAULT_DELIMITER)
|
||||||
|
if (arrayHeader) {
|
||||||
|
// Create a synthetic line for array decoding
|
||||||
|
const syntheticLine: ParsedLine = {
|
||||||
|
raw: rest,
|
||||||
|
content: rest,
|
||||||
|
indent: baseDepth * options.indent,
|
||||||
|
depth: baseDepth,
|
||||||
|
}
|
||||||
|
|
||||||
|
const value = decodeArrayFromHeader(arrayHeader.header, syntheticLine, cursor, baseDepth, options)
|
||||||
|
|
||||||
|
// After an array, subsequent fields are at baseDepth + 1 (where array content is)
|
||||||
|
return {
|
||||||
|
key: arrayHeader.header.key!,
|
||||||
|
value,
|
||||||
|
followDepth: baseDepth + 1,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Regular key-value pair
|
||||||
|
const { key, end } = parseKeyToken(rest, 0)
|
||||||
|
const afterKey = rest.slice(end).trim()
|
||||||
|
|
||||||
|
if (!afterKey) {
|
||||||
|
// Nested object
|
||||||
|
const nested = expectNestedObject(cursor, baseDepth + 1, options)
|
||||||
|
return { key, value: nested, followDepth: baseDepth + 1 }
|
||||||
|
}
|
||||||
|
|
||||||
|
// Inline primitive
|
||||||
|
const value = parsePrimitiveToken(afterKey)
|
||||||
|
return { key, value, followDepth: baseDepth + 1 }
|
||||||
|
}
|
||||||
|
|
||||||
|
// #endregion
|
||||||
|
|
||||||
|
// #region Validation
|
||||||
|
|
||||||
|
function assertExpectedCount(actual: number, expected: number, what: string, options: ResolvedDecodeOptions): void {
|
||||||
|
if (options.strict && actual !== expected) {
|
||||||
|
throw new Error(`Expected ${expected} ${what}, but got ${actual}`)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// #endregion
|
||||||
26
src/index.ts
26
src/index.ts
@@ -1,13 +1,19 @@
|
|||||||
import type {
|
import type {
|
||||||
|
DecodeOptions,
|
||||||
EncodeOptions,
|
EncodeOptions,
|
||||||
|
JsonValue,
|
||||||
|
ResolvedDecodeOptions,
|
||||||
ResolvedEncodeOptions,
|
ResolvedEncodeOptions,
|
||||||
} from './types'
|
} from './types'
|
||||||
import { DEFAULT_DELIMITER } from './constants'
|
import { DEFAULT_DELIMITER } from './constants'
|
||||||
|
import { decodeValueFromLines } from './decoders'
|
||||||
import { encodeValue } from './encoders'
|
import { encodeValue } from './encoders'
|
||||||
import { normalizeValue } from './normalize'
|
import { normalizeValue } from './normalize'
|
||||||
|
import { LineCursor, toParsedLines } from './scanner'
|
||||||
|
|
||||||
export { DEFAULT_DELIMITER, DELIMITERS } from './constants'
|
export { DEFAULT_DELIMITER, DELIMITERS } from './constants'
|
||||||
export type {
|
export type {
|
||||||
|
DecodeOptions,
|
||||||
Delimiter,
|
Delimiter,
|
||||||
DelimiterKey,
|
DelimiterKey,
|
||||||
EncodeOptions,
|
EncodeOptions,
|
||||||
@@ -15,6 +21,7 @@ export type {
|
|||||||
JsonObject,
|
JsonObject,
|
||||||
JsonPrimitive,
|
JsonPrimitive,
|
||||||
JsonValue,
|
JsonValue,
|
||||||
|
ResolvedDecodeOptions,
|
||||||
ResolvedEncodeOptions,
|
ResolvedEncodeOptions,
|
||||||
} from './types'
|
} from './types'
|
||||||
|
|
||||||
@@ -24,6 +31,18 @@ export function encode(input: unknown, options?: EncodeOptions): string {
|
|||||||
return encodeValue(normalizedValue, resolvedOptions)
|
return encodeValue(normalizedValue, resolvedOptions)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function decode(input: string, options?: DecodeOptions): JsonValue {
|
||||||
|
const resolved = resolveDecodeOptions(options)
|
||||||
|
const lines = toParsedLines(input, resolved.indent)
|
||||||
|
|
||||||
|
if (lines.length === 0) {
|
||||||
|
throw new Error('Cannot decode empty input')
|
||||||
|
}
|
||||||
|
|
||||||
|
const cursor = new LineCursor(lines)
|
||||||
|
return decodeValueFromLines(cursor, resolved)
|
||||||
|
}
|
||||||
|
|
||||||
function resolveOptions(options?: EncodeOptions): ResolvedEncodeOptions {
|
function resolveOptions(options?: EncodeOptions): ResolvedEncodeOptions {
|
||||||
return {
|
return {
|
||||||
indent: options?.indent ?? 2,
|
indent: options?.indent ?? 2,
|
||||||
@@ -31,3 +50,10 @@ function resolveOptions(options?: EncodeOptions): ResolvedEncodeOptions {
|
|||||||
lengthMarker: options?.lengthMarker ?? false,
|
lengthMarker: options?.lengthMarker ?? false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function resolveDecodeOptions(options?: DecodeOptions): ResolvedDecodeOptions {
|
||||||
|
return {
|
||||||
|
indent: options?.indent ?? 2,
|
||||||
|
strict: options?.strict ?? true,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
393
src/parser.ts
Normal file
393
src/parser.ts
Normal file
@@ -0,0 +1,393 @@
|
|||||||
|
import type {
|
||||||
|
ArrayHeaderInfo,
|
||||||
|
Delimiter,
|
||||||
|
JsonPrimitive,
|
||||||
|
} from './types'
|
||||||
|
import {
|
||||||
|
BACKSLASH,
|
||||||
|
CARRIAGE_RETURN,
|
||||||
|
CLOSE_BRACE,
|
||||||
|
CLOSE_BRACKET,
|
||||||
|
COLON,
|
||||||
|
DELIMITERS,
|
||||||
|
DOUBLE_QUOTE,
|
||||||
|
FALSE_LITERAL,
|
||||||
|
HASH,
|
||||||
|
NEWLINE,
|
||||||
|
NULL_LITERAL,
|
||||||
|
OPEN_BRACE,
|
||||||
|
OPEN_BRACKET,
|
||||||
|
PIPE,
|
||||||
|
TAB,
|
||||||
|
TRUE_LITERAL,
|
||||||
|
} from './constants'
|
||||||
|
|
||||||
|
// #region Array header parsing
|
||||||
|
|
||||||
|
export function parseArrayHeaderLine(
|
||||||
|
content: string,
|
||||||
|
defaultDelimiter: Delimiter,
|
||||||
|
): { header: ArrayHeaderInfo, inlineValues?: string } | undefined {
|
||||||
|
// Don't match if the line starts with a quote (it's a quoted key, not an array)
|
||||||
|
if (content.trimStart().startsWith(DOUBLE_QUOTE)) {
|
||||||
|
return undefined
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find the bracket segment first
|
||||||
|
const bracketStart = content.indexOf(OPEN_BRACKET)
|
||||||
|
if (bracketStart === -1) {
|
||||||
|
return undefined
|
||||||
|
}
|
||||||
|
|
||||||
|
const bracketEnd = content.indexOf(CLOSE_BRACKET, bracketStart)
|
||||||
|
if (bracketEnd === -1) {
|
||||||
|
return undefined
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find the colon that comes after all brackets and braces
|
||||||
|
let colonIndex = bracketEnd + 1
|
||||||
|
let braceEnd = colonIndex
|
||||||
|
|
||||||
|
// Check for fields segment (braces come after bracket)
|
||||||
|
const braceStart = content.indexOf(OPEN_BRACE, bracketEnd)
|
||||||
|
if (braceStart !== -1 && braceStart < content.indexOf(COLON, bracketEnd)) {
|
||||||
|
const foundBraceEnd = content.indexOf(CLOSE_BRACE, braceStart)
|
||||||
|
if (foundBraceEnd !== -1) {
|
||||||
|
braceEnd = foundBraceEnd + 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now find colon after brackets and braces
|
||||||
|
colonIndex = content.indexOf(COLON, Math.max(bracketEnd, braceEnd))
|
||||||
|
if (colonIndex === -1) {
|
||||||
|
return undefined
|
||||||
|
}
|
||||||
|
|
||||||
|
const key = bracketStart > 0 ? content.slice(0, bracketStart) : undefined
|
||||||
|
const afterColon = content.slice(colonIndex + 1).trim()
|
||||||
|
|
||||||
|
const bracketContent = content.slice(bracketStart + 1, bracketEnd)
|
||||||
|
|
||||||
|
// Try to parse bracket segment; return undefined if it fails
|
||||||
|
let parsedBracket
|
||||||
|
try {
|
||||||
|
parsedBracket = parseBracketSegment(bracketContent, defaultDelimiter)
|
||||||
|
}
|
||||||
|
catch {
|
||||||
|
return undefined
|
||||||
|
}
|
||||||
|
|
||||||
|
const { length, delimiter, hasLengthMarker } = parsedBracket
|
||||||
|
|
||||||
|
// Check for fields segment
|
||||||
|
let fields: string[] | undefined
|
||||||
|
if (braceStart !== -1 && braceStart < colonIndex) {
|
||||||
|
const foundBraceEnd = content.indexOf(CLOSE_BRACE, braceStart)
|
||||||
|
if (foundBraceEnd !== -1 && foundBraceEnd < colonIndex) {
|
||||||
|
const fieldsContent = content.slice(braceStart + 1, foundBraceEnd)
|
||||||
|
fields = parseFieldsSegment(fieldsContent, delimiter)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
header: {
|
||||||
|
key,
|
||||||
|
length,
|
||||||
|
delimiter,
|
||||||
|
fields,
|
||||||
|
hasLengthMarker,
|
||||||
|
},
|
||||||
|
inlineValues: afterColon || undefined,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export function parseBracketSegment(
|
||||||
|
seg: string,
|
||||||
|
defaultDelimiter: Delimiter,
|
||||||
|
): { length: number, delimiter: Delimiter, hasLengthMarker: boolean } {
|
||||||
|
let hasLengthMarker = false
|
||||||
|
let content = seg
|
||||||
|
|
||||||
|
// Check for length marker
|
||||||
|
if (content.startsWith(HASH)) {
|
||||||
|
hasLengthMarker = true
|
||||||
|
content = content.slice(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for delimiter suffix
|
||||||
|
let delimiter = defaultDelimiter
|
||||||
|
if (content.endsWith(TAB)) {
|
||||||
|
delimiter = DELIMITERS.tab
|
||||||
|
content = content.slice(0, -1)
|
||||||
|
}
|
||||||
|
else if (content.endsWith(PIPE)) {
|
||||||
|
delimiter = DELIMITERS.pipe
|
||||||
|
content = content.slice(0, -1)
|
||||||
|
}
|
||||||
|
|
||||||
|
const length = Number.parseInt(content, 10)
|
||||||
|
if (Number.isNaN(length)) {
|
||||||
|
throw new TypeError(`Invalid array length: ${seg}`)
|
||||||
|
}
|
||||||
|
|
||||||
|
return { length, delimiter, hasLengthMarker }
|
||||||
|
}
|
||||||
|
|
||||||
|
export function parseFieldsSegment(seg: string, delimiter: Delimiter): string[] {
|
||||||
|
return splitDelimitedValues(seg, delimiter).map(field => parseStringLiteral(field.trim()))
|
||||||
|
}
|
||||||
|
|
||||||
|
// #endregion
|
||||||
|
|
||||||
|
// #region Delimited value parsing
|
||||||
|
|
||||||
|
export function splitDelimitedValues(input: string, delimiter: Delimiter): string[] {
|
||||||
|
const values: string[] = []
|
||||||
|
let current = ''
|
||||||
|
let inQuotes = false
|
||||||
|
let i = 0
|
||||||
|
|
||||||
|
while (i < input.length) {
|
||||||
|
const char = input[i]
|
||||||
|
|
||||||
|
if (char === BACKSLASH && i + 1 < input.length && inQuotes) {
|
||||||
|
// Escape sequence in quoted string
|
||||||
|
current += char + input[i + 1]
|
||||||
|
i += 2
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if (char === DOUBLE_QUOTE) {
|
||||||
|
inQuotes = !inQuotes
|
||||||
|
current += char
|
||||||
|
i++
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if (char === delimiter && !inQuotes) {
|
||||||
|
values.push(current.trim())
|
||||||
|
current = ''
|
||||||
|
i++
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
current += char
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add last value
|
||||||
|
if (current || values.length > 0) {
|
||||||
|
values.push(current.trim())
|
||||||
|
}
|
||||||
|
|
||||||
|
return values
|
||||||
|
}
|
||||||
|
|
||||||
|
export function parseRowValuesToPrimitives(values: string[]): JsonPrimitive[] {
|
||||||
|
return values.map(v => parsePrimitiveToken(v))
|
||||||
|
}
|
||||||
|
|
||||||
|
// #endregion
|
||||||
|
|
||||||
|
// #region Primitive and key parsing
|
||||||
|
|
||||||
|
export function parsePrimitiveToken(token: string): JsonPrimitive {
|
||||||
|
const trimmed = token.trim()
|
||||||
|
|
||||||
|
// Empty token
|
||||||
|
if (!trimmed) {
|
||||||
|
return ''
|
||||||
|
}
|
||||||
|
|
||||||
|
// Quoted string (if starts with quote, it MUST be properly quoted)
|
||||||
|
if (trimmed.startsWith(DOUBLE_QUOTE)) {
|
||||||
|
return parseStringLiteral(trimmed)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Boolean or null literals
|
||||||
|
if (isBooleanOrNullLiteral(trimmed)) {
|
||||||
|
if (trimmed === TRUE_LITERAL)
|
||||||
|
return true
|
||||||
|
if (trimmed === FALSE_LITERAL)
|
||||||
|
return false
|
||||||
|
if (trimmed === NULL_LITERAL)
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
|
||||||
|
// Numeric literal
|
||||||
|
if (isNumericLiteral(trimmed)) {
|
||||||
|
return Number.parseFloat(trimmed)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Unquoted string
|
||||||
|
return trimmed
|
||||||
|
}
|
||||||
|
|
||||||
|
export function isBooleanOrNullLiteral(token: string): boolean {
|
||||||
|
return token === TRUE_LITERAL || token === FALSE_LITERAL || token === NULL_LITERAL
|
||||||
|
}
|
||||||
|
|
||||||
|
export function isNumericLiteral(token: string): boolean {
|
||||||
|
if (!token)
|
||||||
|
return false
|
||||||
|
|
||||||
|
// Must not have leading zeros (except for "0" itself or decimals like "0.5")
|
||||||
|
if (token.length > 1 && token[0] === '0' && token[1] !== '.') {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if it's a valid number
|
||||||
|
const num = Number(token)
|
||||||
|
return !Number.isNaN(num) && Number.isFinite(num)
|
||||||
|
}
|
||||||
|
|
||||||
|
export function parseStringLiteral(token: string): string {
|
||||||
|
const trimmed = token.trim()
|
||||||
|
|
||||||
|
if (trimmed.startsWith(DOUBLE_QUOTE)) {
|
||||||
|
// Find the closing quote, accounting for escaped quotes
|
||||||
|
let i = 1
|
||||||
|
while (i < trimmed.length) {
|
||||||
|
if (trimmed[i] === BACKSLASH && i + 1 < trimmed.length) {
|
||||||
|
// Skip escaped character
|
||||||
|
i += 2
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if (trimmed[i] === DOUBLE_QUOTE) {
|
||||||
|
// Found closing quote
|
||||||
|
if (i !== trimmed.length - 1) {
|
||||||
|
throw new Error('Unexpected characters after closing quote')
|
||||||
|
}
|
||||||
|
const content = trimmed.slice(1, i)
|
||||||
|
return unescapeString(content)
|
||||||
|
}
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we get here, no closing quote was found
|
||||||
|
throw new Error('Unterminated string: missing closing quote')
|
||||||
|
}
|
||||||
|
|
||||||
|
return trimmed
|
||||||
|
}
|
||||||
|
|
||||||
|
export function unescapeString(value: string): string {
|
||||||
|
let result = ''
|
||||||
|
let i = 0
|
||||||
|
|
||||||
|
while (i < value.length) {
|
||||||
|
if (value[i] === BACKSLASH) {
|
||||||
|
if (i + 1 >= value.length) {
|
||||||
|
throw new Error('Invalid escape sequence: backslash at end of string')
|
||||||
|
}
|
||||||
|
|
||||||
|
const next = value[i + 1]
|
||||||
|
if (next === 'n') {
|
||||||
|
result += NEWLINE
|
||||||
|
i += 2
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if (next === 't') {
|
||||||
|
result += TAB
|
||||||
|
i += 2
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if (next === 'r') {
|
||||||
|
result += CARRIAGE_RETURN
|
||||||
|
i += 2
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if (next === BACKSLASH) {
|
||||||
|
result += BACKSLASH
|
||||||
|
i += 2
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if (next === DOUBLE_QUOTE) {
|
||||||
|
result += DOUBLE_QUOTE
|
||||||
|
i += 2
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new Error(`Invalid escape sequence: \\${next}`)
|
||||||
|
}
|
||||||
|
|
||||||
|
result += value[i]
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
export function parseUnquotedKey(content: string, start: number): { key: string, end: number } {
|
||||||
|
let end = start
|
||||||
|
while (end < content.length && content[end] !== COLON) {
|
||||||
|
end++
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validate that a colon was found
|
||||||
|
if (end >= content.length || content[end] !== COLON) {
|
||||||
|
throw new Error('Missing colon after key')
|
||||||
|
}
|
||||||
|
|
||||||
|
const key = content.slice(start, end).trim()
|
||||||
|
|
||||||
|
// Skip the colon
|
||||||
|
end++
|
||||||
|
|
||||||
|
return { key, end }
|
||||||
|
}
|
||||||
|
|
||||||
|
export function parseQuotedKey(content: string, start: number): { key: string, end: number } {
|
||||||
|
let i = start + 1 // Skip opening quote
|
||||||
|
let keyContent = ''
|
||||||
|
|
||||||
|
while (i < content.length) {
|
||||||
|
if (content[i] === BACKSLASH && i + 1 < content.length) {
|
||||||
|
keyContent += content[i]! + content[i + 1]
|
||||||
|
i += 2
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if (content[i] === DOUBLE_QUOTE) {
|
||||||
|
// Found closing quote
|
||||||
|
const key = unescapeString(keyContent)
|
||||||
|
let end = i + 1
|
||||||
|
|
||||||
|
// Validate and skip colon after quoted key
|
||||||
|
if (end >= content.length || content[end] !== COLON) {
|
||||||
|
throw new Error('Missing colon after key')
|
||||||
|
}
|
||||||
|
end++
|
||||||
|
|
||||||
|
return { key, end }
|
||||||
|
}
|
||||||
|
|
||||||
|
keyContent += content[i]
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new Error('Unterminated quoted key')
|
||||||
|
}
|
||||||
|
|
||||||
|
export function parseKeyToken(content: string, start: number): { key: string, end: number } {
|
||||||
|
if (content[start] === DOUBLE_QUOTE) {
|
||||||
|
return parseQuotedKey(content, start)
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return parseUnquotedKey(content, start)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// #endregion
|
||||||
|
|
||||||
|
// #region Array content detection helpers
|
||||||
|
|
||||||
|
export function isArrayHeaderAfterHyphen(content: string): boolean {
|
||||||
|
return content.trim().startsWith(OPEN_BRACKET) && content.includes(COLON)
|
||||||
|
}
|
||||||
|
|
||||||
|
export function isObjectFirstFieldAfterHyphen(content: string): boolean {
|
||||||
|
return content.includes(COLON)
|
||||||
|
}
|
||||||
|
|
||||||
|
// #endregion
|
||||||
63
src/scanner.ts
Normal file
63
src/scanner.ts
Normal file
@@ -0,0 +1,63 @@
|
|||||||
|
import type { Depth, ParsedLine } from './types'
|
||||||
|
import { SPACE } from './constants'
|
||||||
|
|
||||||
|
export class LineCursor {
|
||||||
|
private lines: ParsedLine[]
|
||||||
|
private index: number
|
||||||
|
|
||||||
|
constructor(lines: ParsedLine[]) {
|
||||||
|
this.lines = lines
|
||||||
|
this.index = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
peek(): ParsedLine | undefined {
|
||||||
|
return this.lines[this.index]
|
||||||
|
}
|
||||||
|
|
||||||
|
next(): ParsedLine | undefined {
|
||||||
|
return this.lines[this.index++]
|
||||||
|
}
|
||||||
|
|
||||||
|
current(): ParsedLine | undefined {
|
||||||
|
return this.index > 0 ? this.lines[this.index - 1] : undefined
|
||||||
|
}
|
||||||
|
|
||||||
|
advance(): void {
|
||||||
|
this.index++
|
||||||
|
}
|
||||||
|
|
||||||
|
atEnd(): boolean {
|
||||||
|
return this.index >= this.lines.length
|
||||||
|
}
|
||||||
|
|
||||||
|
get length(): number {
|
||||||
|
return this.lines.length
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export function toParsedLines(source: string, indentSize: number): ParsedLine[] {
|
||||||
|
if (!source.trim()) {
|
||||||
|
return []
|
||||||
|
}
|
||||||
|
|
||||||
|
const lines = source.split('\n')
|
||||||
|
const parsed: ParsedLine[] = []
|
||||||
|
|
||||||
|
for (const raw of lines) {
|
||||||
|
let indent = 0
|
||||||
|
while (indent < raw.length && raw[indent] === SPACE) {
|
||||||
|
indent++
|
||||||
|
}
|
||||||
|
|
||||||
|
const content = raw.slice(indent)
|
||||||
|
const depth = computeDepthFromIndent(indent, indentSize)
|
||||||
|
|
||||||
|
parsed.push({ raw, indent, content, depth })
|
||||||
|
}
|
||||||
|
|
||||||
|
return parsed
|
||||||
|
}
|
||||||
|
|
||||||
|
function computeDepthFromIndent(indentSpaces: number, indentSize: number): Depth {
|
||||||
|
return Math.floor(indentSpaces / indentSize)
|
||||||
|
}
|
||||||
38
src/types.ts
38
src/types.ts
@@ -36,4 +36,42 @@ export type ResolvedEncodeOptions = Readonly<Required<EncodeOptions>>
|
|||||||
|
|
||||||
// #endregion
|
// #endregion
|
||||||
|
|
||||||
|
// #region Decoder options
|
||||||
|
|
||||||
|
export interface DecodeOptions {
|
||||||
|
/**
|
||||||
|
* Number of spaces per indentation level.
|
||||||
|
* @default 2
|
||||||
|
*/
|
||||||
|
indent?: number
|
||||||
|
/**
|
||||||
|
* When true, enforce strict validation of array lengths and tabular row counts.
|
||||||
|
* @default true
|
||||||
|
*/
|
||||||
|
strict?: boolean
|
||||||
|
}
|
||||||
|
|
||||||
|
export type ResolvedDecodeOptions = Readonly<Required<DecodeOptions>>
|
||||||
|
|
||||||
|
// #endregion
|
||||||
|
|
||||||
|
// #region Decoder parsing types
|
||||||
|
|
||||||
|
export interface ArrayHeaderInfo {
|
||||||
|
key?: string
|
||||||
|
length: number
|
||||||
|
delimiter: Delimiter
|
||||||
|
fields?: string[]
|
||||||
|
hasLengthMarker: boolean
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ParsedLine {
|
||||||
|
raw: string
|
||||||
|
depth: Depth
|
||||||
|
indent: number
|
||||||
|
content: string
|
||||||
|
}
|
||||||
|
|
||||||
|
// #endregion
|
||||||
|
|
||||||
export type Depth = number
|
export type Depth = number
|
||||||
|
|||||||
494
test/decode.test.ts
Normal file
494
test/decode.test.ts
Normal file
@@ -0,0 +1,494 @@
|
|||||||
|
import { describe, expect, it } from 'vitest'
|
||||||
|
import { decode } from '../src/index'
|
||||||
|
|
||||||
|
describe('primitives', () => {
|
||||||
|
it('decodes safe unquoted strings', () => {
|
||||||
|
expect(decode('hello')).toBe('hello')
|
||||||
|
expect(decode('Ada_99')).toBe('Ada_99')
|
||||||
|
})
|
||||||
|
|
||||||
|
it('decodes quoted strings and unescapes control characters', () => {
|
||||||
|
expect(decode('""')).toBe('')
|
||||||
|
expect(decode('"line1\\nline2"')).toBe('line1\nline2')
|
||||||
|
expect(decode('"tab\\there"')).toBe('tab\there')
|
||||||
|
expect(decode('"return\\rcarriage"')).toBe('return\rcarriage')
|
||||||
|
expect(decode('"C:\\\\Users\\\\path"')).toBe('C:\\Users\\path')
|
||||||
|
expect(decode('"say \\"hello\\""')).toBe('say "hello"')
|
||||||
|
})
|
||||||
|
|
||||||
|
it('decodes unicode and emoji', () => {
|
||||||
|
expect(decode('café')).toBe('café')
|
||||||
|
expect(decode('你好')).toBe('你好')
|
||||||
|
expect(decode('🚀')).toBe('🚀')
|
||||||
|
expect(decode('hello 👋 world')).toBe('hello 👋 world')
|
||||||
|
})
|
||||||
|
|
||||||
|
it('decodes numbers, booleans and null', () => {
|
||||||
|
expect(decode('42')).toBe(42)
|
||||||
|
expect(decode('3.14')).toBe(3.14)
|
||||||
|
expect(decode('-7')).toBe(-7)
|
||||||
|
expect(decode('true')).toBe(true)
|
||||||
|
expect(decode('false')).toBe(false)
|
||||||
|
expect(decode('null')).toBe(null)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('respects ambiguity quoting (quoted primitives remain strings)', () => {
|
||||||
|
expect(decode('"true"')).toBe('true')
|
||||||
|
expect(decode('"false"')).toBe('false')
|
||||||
|
expect(decode('"null"')).toBe('null')
|
||||||
|
expect(decode('"42"')).toBe('42')
|
||||||
|
expect(decode('"-3.14"')).toBe('-3.14')
|
||||||
|
expect(decode('"1e-6"')).toBe('1e-6')
|
||||||
|
expect(decode('"05"')).toBe('05')
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('objects (simple)', () => {
|
||||||
|
it('parses objects with primitive values', () => {
|
||||||
|
const toon = 'id: 123\nname: Ada\nactive: true'
|
||||||
|
expect(decode(toon)).toEqual({ id: 123, name: 'Ada', active: true })
|
||||||
|
})
|
||||||
|
|
||||||
|
it('parses null values in objects', () => {
|
||||||
|
const toon = 'id: 123\nvalue: null'
|
||||||
|
expect(decode(toon)).toEqual({ id: 123, value: null })
|
||||||
|
})
|
||||||
|
|
||||||
|
it('parses empty nested object header', () => {
|
||||||
|
expect(decode('user:')).toEqual({ user: {} })
|
||||||
|
})
|
||||||
|
|
||||||
|
it('parses quoted object values with special characters and escapes', () => {
|
||||||
|
expect(decode('note: "a:b"')).toEqual({ note: 'a:b' })
|
||||||
|
expect(decode('note: "a,b"')).toEqual({ note: 'a,b' })
|
||||||
|
expect(decode('text: "line1\\nline2"')).toEqual({ text: 'line1\nline2' })
|
||||||
|
expect(decode('text: "say \\"hello\\""')).toEqual({ text: 'say "hello"' })
|
||||||
|
expect(decode('text: " padded "')).toEqual({ text: ' padded ' })
|
||||||
|
expect(decode('text: " "')).toEqual({ text: ' ' })
|
||||||
|
expect(decode('v: "true"')).toEqual({ v: 'true' })
|
||||||
|
expect(decode('v: "42"')).toEqual({ v: '42' })
|
||||||
|
expect(decode('v: "-7.5"')).toEqual({ v: '-7.5' })
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('objects (keys)', () => {
|
||||||
|
it('parses quoted keys with special characters and escapes', () => {
|
||||||
|
expect(decode('"order:id": 7')).toEqual({ 'order:id': 7 })
|
||||||
|
expect(decode('"[index]": 5')).toEqual({ '[index]': 5 })
|
||||||
|
expect(decode('"{key}": 5')).toEqual({ '{key}': 5 })
|
||||||
|
expect(decode('"a,b": 1')).toEqual({ 'a,b': 1 })
|
||||||
|
expect(decode('"full name": Ada')).toEqual({ 'full name': 'Ada' })
|
||||||
|
expect(decode('"-lead": 1')).toEqual({ '-lead': 1 })
|
||||||
|
expect(decode('" a ": 1')).toEqual({ ' a ': 1 })
|
||||||
|
expect(decode('"123": x')).toEqual({ 123: 'x' })
|
||||||
|
expect(decode('"": 1')).toEqual({ '': 1 })
|
||||||
|
})
|
||||||
|
|
||||||
|
it('parses dotted keys as identifiers', () => {
|
||||||
|
expect(decode('user.name: Ada')).toEqual({ 'user.name': 'Ada' })
|
||||||
|
expect(decode('_private: 1')).toEqual({ _private: 1 })
|
||||||
|
expect(decode('user_name: 1')).toEqual({ user_name: 1 })
|
||||||
|
})
|
||||||
|
|
||||||
|
it('unescapes control characters and quotes in keys', () => {
|
||||||
|
expect(decode('"line\\nbreak": 1')).toEqual({ 'line\nbreak': 1 })
|
||||||
|
expect(decode('"tab\\there": 2')).toEqual({ 'tab\there': 2 })
|
||||||
|
expect(decode('"he said \\"hi\\"": 1')).toEqual({ 'he said "hi"': 1 })
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('nested objects', () => {
|
||||||
|
it('parses deeply nested objects with indentation', () => {
|
||||||
|
const toon = 'a:\n b:\n c: deep'
|
||||||
|
expect(decode(toon)).toEqual({ a: { b: { c: 'deep' } } })
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('arrays of primitives', () => {
|
||||||
|
it('parses string arrays inline', () => {
|
||||||
|
const toon = 'tags[3]: reading,gaming,coding'
|
||||||
|
expect(decode(toon)).toEqual({ tags: ['reading', 'gaming', 'coding'] })
|
||||||
|
})
|
||||||
|
|
||||||
|
it('parses number arrays inline', () => {
|
||||||
|
const toon = 'nums[3]: 1,2,3'
|
||||||
|
expect(decode(toon)).toEqual({ nums: [1, 2, 3] })
|
||||||
|
})
|
||||||
|
|
||||||
|
it('parses mixed primitive arrays inline', () => {
|
||||||
|
const toon = 'data[4]: x,y,true,10'
|
||||||
|
expect(decode(toon)).toEqual({ data: ['x', 'y', true, 10] })
|
||||||
|
})
|
||||||
|
|
||||||
|
it('parses empty arrays', () => {
|
||||||
|
expect(decode('items[0]:')).toEqual({ items: [] })
|
||||||
|
})
|
||||||
|
|
||||||
|
it('parses quoted strings in arrays including empty and whitespace-only', () => {
|
||||||
|
expect(decode('items[1]: ""')).toEqual({ items: [''] })
|
||||||
|
expect(decode('items[3]: a,"",b')).toEqual({ items: ['a', '', 'b'] })
|
||||||
|
expect(decode('items[2]: " "," "')).toEqual({ items: [' ', ' '] })
|
||||||
|
})
|
||||||
|
|
||||||
|
it('parses strings with delimiters and structural tokens in arrays', () => {
|
||||||
|
expect(decode('items[3]: a,"b,c","d:e"')).toEqual({ items: ['a', 'b,c', 'd:e'] })
|
||||||
|
expect(decode('items[4]: x,"true","42","-3.14"')).toEqual({ items: ['x', 'true', '42', '-3.14'] })
|
||||||
|
expect(decode('items[3]: "[5]","- item","{key}"')).toEqual({ items: ['[5]', '- item', '{key}'] })
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('arrays of objects (tabular and list items)', () => {
|
||||||
|
it('parses tabular arrays of uniform objects', () => {
|
||||||
|
const toon = 'items[2]{sku,qty,price}:\n A1,2,9.99\n B2,1,14.5'
|
||||||
|
expect(decode(toon)).toEqual({
|
||||||
|
items: [
|
||||||
|
{ sku: 'A1', qty: 2, price: 9.99 },
|
||||||
|
{ sku: 'B2', qty: 1, price: 14.5 },
|
||||||
|
],
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
it('parses nulls and quoted values in tabular rows', () => {
|
||||||
|
const toon = 'items[2]{id,value}:\n 1,null\n 2,"test"'
|
||||||
|
expect(decode(toon)).toEqual({
|
||||||
|
items: [
|
||||||
|
{ id: 1, value: null },
|
||||||
|
{ id: 2, value: 'test' },
|
||||||
|
],
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
it('parses quoted header keys in tabular arrays', () => {
|
||||||
|
const toon = 'items[2]{"order:id","full name"}:\n 1,Ada\n 2,Bob'
|
||||||
|
expect(decode(toon)).toEqual({
|
||||||
|
items: [
|
||||||
|
{ 'order:id': 1, 'full name': 'Ada' },
|
||||||
|
{ 'order:id': 2, 'full name': 'Bob' },
|
||||||
|
],
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
it('parses list arrays for non-uniform objects', () => {
|
||||||
|
const toon
|
||||||
|
= 'items[2]:\n'
|
||||||
|
+ ' - id: 1\n'
|
||||||
|
+ ' name: First\n'
|
||||||
|
+ ' - id: 2\n'
|
||||||
|
+ ' name: Second\n'
|
||||||
|
+ ' extra: true'
|
||||||
|
expect(decode(toon)).toEqual({
|
||||||
|
items: [
|
||||||
|
{ id: 1, name: 'First' },
|
||||||
|
{ id: 2, name: 'Second', extra: true },
|
||||||
|
],
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
it('parses objects with nested values inside list items', () => {
|
||||||
|
const toon
|
||||||
|
= 'items[1]:\n'
|
||||||
|
+ ' - id: 1\n'
|
||||||
|
+ ' nested:\n'
|
||||||
|
+ ' x: 1'
|
||||||
|
expect(decode(toon)).toEqual({
|
||||||
|
items: [{ id: 1, nested: { x: 1 } }],
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
it('parses nested tabular arrays as first field on hyphen line', () => {
|
||||||
|
const toon
|
||||||
|
= 'items[1]:\n'
|
||||||
|
+ ' - users[2]{id,name}:\n'
|
||||||
|
+ ' 1,Ada\n'
|
||||||
|
+ ' 2,Bob\n'
|
||||||
|
+ ' status: active'
|
||||||
|
expect(decode(toon)).toEqual({
|
||||||
|
items: [
|
||||||
|
{
|
||||||
|
users: [
|
||||||
|
{ id: 1, name: 'Ada' },
|
||||||
|
{ id: 2, name: 'Bob' },
|
||||||
|
],
|
||||||
|
status: 'active',
|
||||||
|
},
|
||||||
|
],
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
it('parses objects containing arrays (including empty arrays) in list format', () => {
|
||||||
|
const toon
|
||||||
|
= 'items[1]:\n'
|
||||||
|
+ ' - name: test\n'
|
||||||
|
+ ' data[0]:'
|
||||||
|
expect(decode(toon)).toEqual({
|
||||||
|
items: [{ name: 'test', data: [] }],
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
it('parses arrays of arrays within objects', () => {
|
||||||
|
const toon
|
||||||
|
= 'items[1]:\n'
|
||||||
|
+ ' - matrix[2]:\n'
|
||||||
|
+ ' - [2]: 1,2\n'
|
||||||
|
+ ' - [2]: 3,4\n'
|
||||||
|
+ ' name: grid'
|
||||||
|
expect(decode(toon)).toEqual({
|
||||||
|
items: [{ matrix: [[1, 2], [3, 4]], name: 'grid' }],
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('arrays of arrays (primitives only)', () => {
|
||||||
|
it('parses nested arrays of primitives', () => {
|
||||||
|
const toon = 'pairs[2]:\n - [2]: a,b\n - [2]: c,d'
|
||||||
|
expect(decode(toon)).toEqual({ pairs: [['a', 'b'], ['c', 'd']] })
|
||||||
|
})
|
||||||
|
|
||||||
|
it('parses quoted strings and mixed lengths in nested arrays', () => {
|
||||||
|
const toon = 'pairs[2]:\n - [2]: a,b\n - [3]: "c,d","e:f","true"'
|
||||||
|
expect(decode(toon)).toEqual({ pairs: [['a', 'b'], ['c,d', 'e:f', 'true']] })
|
||||||
|
})
|
||||||
|
|
||||||
|
it('parses empty inner arrays', () => {
|
||||||
|
const toon = 'pairs[2]:\n - [0]:\n - [0]:'
|
||||||
|
expect(decode(toon)).toEqual({ pairs: [[], []] })
|
||||||
|
})
|
||||||
|
|
||||||
|
it('parses mixed-length inner arrays', () => {
|
||||||
|
const toon = 'pairs[2]:\n - [1]: 1\n - [2]: 2,3'
|
||||||
|
expect(decode(toon)).toEqual({ pairs: [[1], [2, 3]] })
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('root arrays', () => {
|
||||||
|
it('parses root arrays of primitives (inline)', () => {
|
||||||
|
const toon = '[5]: x,y,"true",true,10'
|
||||||
|
expect(decode(toon)).toEqual(['x', 'y', 'true', true, 10])
|
||||||
|
})
|
||||||
|
|
||||||
|
it('parses root arrays of uniform objects in tabular format', () => {
|
||||||
|
const toon = '[2]{id}:\n 1\n 2'
|
||||||
|
expect(decode(toon)).toEqual([{ id: 1 }, { id: 2 }])
|
||||||
|
})
|
||||||
|
|
||||||
|
it('parses root arrays of non-uniform objects in list format', () => {
|
||||||
|
const toon = '[2]:\n - id: 1\n - id: 2\n name: Ada'
|
||||||
|
expect(decode(toon)).toEqual([{ id: 1 }, { id: 2, name: 'Ada' }])
|
||||||
|
})
|
||||||
|
|
||||||
|
it('parses empty root arrays', () => {
|
||||||
|
expect(decode('[0]:')).toEqual([])
|
||||||
|
})
|
||||||
|
|
||||||
|
it('parses root arrays of arrays', () => {
|
||||||
|
const toon = '[2]:\n - [2]: 1,2\n - [0]:'
|
||||||
|
expect(decode(toon)).toEqual([[1, 2], []])
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('complex structures', () => {
|
||||||
|
it('parses mixed objects with arrays and nested objects', () => {
|
||||||
|
const toon
|
||||||
|
= 'user:\n'
|
||||||
|
+ ' id: 123\n'
|
||||||
|
+ ' name: Ada\n'
|
||||||
|
+ ' tags[2]: reading,gaming\n'
|
||||||
|
+ ' active: true\n'
|
||||||
|
+ ' prefs[0]:'
|
||||||
|
expect(decode(toon)).toEqual({
|
||||||
|
user: {
|
||||||
|
id: 123,
|
||||||
|
name: 'Ada',
|
||||||
|
tags: ['reading', 'gaming'],
|
||||||
|
active: true,
|
||||||
|
prefs: [],
|
||||||
|
},
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('mixed arrays', () => {
|
||||||
|
it('parses arrays mixing primitives, objects and strings (list format)', () => {
|
||||||
|
const toon
|
||||||
|
= 'items[3]:\n'
|
||||||
|
+ ' - 1\n'
|
||||||
|
+ ' - a: 1\n'
|
||||||
|
+ ' - text'
|
||||||
|
expect(decode(toon)).toEqual({ items: [1, { a: 1 }, 'text'] })
|
||||||
|
})
|
||||||
|
|
||||||
|
it('parses arrays mixing objects and arrays', () => {
|
||||||
|
const toon
|
||||||
|
= 'items[2]:\n'
|
||||||
|
+ ' - a: 1\n'
|
||||||
|
+ ' - [2]: 1,2'
|
||||||
|
expect(decode(toon)).toEqual({ items: [{ a: 1 }, [1, 2]] })
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('delimiter options', () => {
|
||||||
|
describe('basic delimiter usage', () => {
|
||||||
|
it.each([
|
||||||
|
{ delimiter: '\t' as const, name: 'tab', header: '[3\t]', joined: 'reading\tgaming\tcoding' },
|
||||||
|
{ delimiter: '|' as const, name: 'pipe', header: '[3|]', joined: 'reading|gaming|coding' },
|
||||||
|
{ delimiter: ',' as const, name: 'comma', header: '[3]', joined: 'reading,gaming,coding' },
|
||||||
|
])('parses primitive arrays with $name delimiter', ({ header, joined }) => {
|
||||||
|
const toon = `tags${header}: ${joined}`
|
||||||
|
expect(decode(toon)).toEqual({ tags: ['reading', 'gaming', 'coding'] })
|
||||||
|
})
|
||||||
|
|
||||||
|
it.each([
|
||||||
|
{ delimiter: '\t' as const, name: 'tab', header: '[2\t]{sku\tqty\tprice}', rows: ['A1\t2\t9.99', 'B2\t1\t14.5'] },
|
||||||
|
{ delimiter: '|' as const, name: 'pipe', header: '[2|]{sku|qty|price}', rows: ['A1|2|9.99', 'B2|1|14.5'] },
|
||||||
|
])('parses tabular arrays with $name delimiter', ({ header, rows }) => {
|
||||||
|
const toon = `items${header}:\n ${rows[0]}\n ${rows[1]}`
|
||||||
|
expect(decode(toon)).toEqual({
|
||||||
|
items: [
|
||||||
|
{ sku: 'A1', qty: 2, price: 9.99 },
|
||||||
|
{ sku: 'B2', qty: 1, price: 14.5 },
|
||||||
|
],
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
it.each([
|
||||||
|
{ header: '[2\t]', inner: '[2\t]', a: 'a\tb', b: 'c\td' },
|
||||||
|
{ header: '[2|]', inner: '[2|]', a: 'a|b', b: 'c|d' },
|
||||||
|
])('parses nested arrays with custom delimiters', ({ header, inner, a, b }) => {
|
||||||
|
const toon = `pairs${header}:\n - ${inner}: ${a}\n - ${inner}: ${b}`
|
||||||
|
expect(decode(toon)).toEqual({ pairs: [['a', 'b'], ['c', 'd']] })
|
||||||
|
})
|
||||||
|
|
||||||
|
it.each([
|
||||||
|
{ header: '[3\t]', joined: 'x\ty\tz' },
|
||||||
|
{ header: '[3|]', joined: 'x|y|z' },
|
||||||
|
])('parses root arrays of primitives with custom delimiters', ({ header, joined }) => {
|
||||||
|
const toon = `${header}: ${joined}`
|
||||||
|
expect(decode(toon)).toEqual(['x', 'y', 'z'])
|
||||||
|
})
|
||||||
|
|
||||||
|
it.each([
|
||||||
|
{ header: '[2\t]{id}', rows: ['1', '2'] },
|
||||||
|
{ header: '[2|]{id}', rows: ['1', '2'] },
|
||||||
|
])('parses root arrays of objects with custom delimiters', ({ header, rows }) => {
|
||||||
|
const toon = `${header}:\n ${rows[0]}\n ${rows[1]}`
|
||||||
|
expect(decode(toon)).toEqual([{ id: 1 }, { id: 2 }])
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('delimiter-aware quoting', () => {
|
||||||
|
it.each([
|
||||||
|
{ header: '[3\t]', joined: 'a\t"b\\tc"\td', expected: ['a', 'b\tc', 'd'] },
|
||||||
|
{ header: '[3|]', joined: 'a|"b|c"|d', expected: ['a', 'b|c', 'd'] },
|
||||||
|
])('parses values containing the active delimiter when quoted', ({ header, joined, expected }) => {
|
||||||
|
const toon = `items${header}: ${joined}`
|
||||||
|
expect(decode(toon)).toEqual({ items: expected })
|
||||||
|
})
|
||||||
|
|
||||||
|
it.each([
|
||||||
|
{ header: '[2\t]', joined: 'a,b\tc,d' },
|
||||||
|
{ header: '[2|]', joined: 'a,b|c,d' },
|
||||||
|
])('does not split on commas when using non-comma delimiter', ({ header, joined }) => {
|
||||||
|
const toon = `items${header}: ${joined}`
|
||||||
|
expect(decode(toon)).toEqual({ items: ['a,b', 'c,d'] })
|
||||||
|
})
|
||||||
|
|
||||||
|
it('parses tabular values containing the active delimiter correctly', () => {
|
||||||
|
const comma = 'items[2]{id,note}:\n 1,"a,b"\n 2,"c,d"'
|
||||||
|
expect(decode(comma)).toEqual({ items: [{ id: 1, note: 'a,b' }, { id: 2, note: 'c,d' }] })
|
||||||
|
|
||||||
|
const tab = 'items[2\t]{id\tnote}:\n 1\ta,b\n 2\tc,d'
|
||||||
|
expect(decode(tab)).toEqual({ items: [{ id: 1, note: 'a,b' }, { id: 2, note: 'c,d' }] })
|
||||||
|
})
|
||||||
|
|
||||||
|
it('does not require quoting commas in object values when using non-comma delimiter elsewhere', () => {
|
||||||
|
expect(decode('note: a,b')).toEqual({ note: 'a,b' })
|
||||||
|
})
|
||||||
|
|
||||||
|
it('parses nested array values containing the active delimiter', () => {
|
||||||
|
expect(decode('pairs[1|]:\n - [2|]: a|"b|c"')).toEqual({ pairs: [['a', 'b|c']] })
|
||||||
|
expect(decode('pairs[1\t]:\n - [2\t]: a\t"b\\tc"')).toEqual({ pairs: [['a', 'b\tc']] })
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('delimiter-independent quoting rules', () => {
|
||||||
|
it('preserves quoted ambiguity regardless of delimiter', () => {
|
||||||
|
expect(decode('items[3|]: "true"|"42"|"-3.14"')).toEqual({ items: ['true', '42', '-3.14'] })
|
||||||
|
expect(decode('items[3\t]: "true"\t"42"\t"-3.14"')).toEqual({ items: ['true', '42', '-3.14'] })
|
||||||
|
})
|
||||||
|
|
||||||
|
it('parses structural-looking strings when quoted', () => {
|
||||||
|
expect(decode('items[3|]: "[5]"|"{key}"|"- item"')).toEqual({ items: ['[5]', '{key}', '- item'] })
|
||||||
|
expect(decode('items[3\t]: "[5]"\t"{key}"\t"- item"')).toEqual({ items: ['[5]', '{key}', '- item'] })
|
||||||
|
})
|
||||||
|
|
||||||
|
it('parses tabular headers with keys containing the active delimiter', () => {
|
||||||
|
const toon = 'items[2|]{"a|b"}:\n 1\n 2'
|
||||||
|
expect(decode(toon)).toEqual({ items: [{ 'a|b': 1 }, { 'a|b': 2 }] })
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('length marker option', () => {
|
||||||
|
it('accepts length marker on primitive arrays', () => {
|
||||||
|
expect(decode('tags[#3]: reading,gaming,coding')).toEqual({ tags: ['reading', 'gaming', 'coding'] })
|
||||||
|
})
|
||||||
|
|
||||||
|
it('accepts length marker on empty arrays', () => {
|
||||||
|
expect(decode('items[#0]:')).toEqual({ items: [] })
|
||||||
|
})
|
||||||
|
|
||||||
|
it('accepts length marker on tabular arrays', () => {
|
||||||
|
const toon = 'items[#2]{sku,qty,price}:\n A1,2,9.99\n B2,1,14.5'
|
||||||
|
expect(decode(toon)).toEqual({
|
||||||
|
items: [
|
||||||
|
{ sku: 'A1', qty: 2, price: 9.99 },
|
||||||
|
{ sku: 'B2', qty: 1, price: 14.5 },
|
||||||
|
],
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
it('accepts length marker on nested arrays', () => {
|
||||||
|
const toon = 'pairs[#2]:\n - [#2]: a,b\n - [#2]: c,d'
|
||||||
|
expect(decode(toon)).toEqual({ pairs: [['a', 'b'], ['c', 'd']] })
|
||||||
|
})
|
||||||
|
|
||||||
|
it('works with custom delimiters and length marker', () => {
|
||||||
|
expect(decode('tags[#3|]: reading|gaming|coding')).toEqual({ tags: ['reading', 'gaming', 'coding'] })
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('error handling', () => {
|
||||||
|
it('throws on array length mismatch (inline primitives)', () => {
|
||||||
|
const toon = 'tags[2]: a,b,c'
|
||||||
|
expect(() => decode(toon)).toThrow()
|
||||||
|
})
|
||||||
|
|
||||||
|
it('throws on array length mismatch (list format)', () => {
|
||||||
|
const toon = 'items[1]:\n - 1\n - 2'
|
||||||
|
expect(() => decode(toon)).toThrow()
|
||||||
|
})
|
||||||
|
|
||||||
|
it('throws when tabular row value count does not match header field count', () => {
|
||||||
|
const toon = 'items[2]{id,name}:\n 1,Ada\n 2'
|
||||||
|
expect(() => decode(toon)).toThrow()
|
||||||
|
})
|
||||||
|
|
||||||
|
it('throws when tabular row count does not match header length', () => {
|
||||||
|
const toon = '[1]{id}:\n 1\n 2'
|
||||||
|
expect(() => decode(toon)).toThrow()
|
||||||
|
})
|
||||||
|
|
||||||
|
it('throws on invalid escape sequences', () => {
|
||||||
|
expect(() => decode('"a\\x"')).toThrow()
|
||||||
|
expect(() => decode('"unterminated')).toThrow()
|
||||||
|
})
|
||||||
|
|
||||||
|
it('throws on missing colon in key-value context', () => {
|
||||||
|
expect(() => decode('a:\n user')).toThrow()
|
||||||
|
})
|
||||||
|
|
||||||
|
it('throws on delimiter mismatch', () => {
|
||||||
|
const toon = 'items[2\t]{a\tb}:\n 1,2\n 3,4'
|
||||||
|
expect(() => decode(toon)).toThrow()
|
||||||
|
})
|
||||||
|
})
|
||||||
Reference in New Issue
Block a user