mirror of
https://github.com/voson-wang/toon.git
synced 2026-01-29 15:24:10 +08:00
feat(decoder): indentation strict-mode enforcement
This commit is contained in:
@@ -642,7 +642,7 @@ Some non-JSON types are automatically normalized for LLM-safe output:
|
|||||||
|---|---|
|
|---|---|
|
||||||
| Number (finite) | Decimal form, no scientific notation (e.g., `-0` → `0`, `1e6` → `1000000`) |
|
| Number (finite) | Decimal form, no scientific notation (e.g., `-0` → `0`, `1e6` → `1000000`) |
|
||||||
| Number (`NaN`, `±Infinity`) | `null` |
|
| Number (`NaN`, `±Infinity`) | `null` |
|
||||||
| `BigInt` | Decimal digits (no quotes) |
|
| `BigInt` | If within safe integer range: converted to number. Otherwise: quoted decimal string (e.g., `"9007199254740993"`) |
|
||||||
| `Date` | ISO string in quotes (e.g., `"2025-01-01T00:00:00.000Z"`) |
|
| `Date` | ISO string in quotes (e.g., `"2025-01-01T00:00:00.000Z"`) |
|
||||||
| `undefined` | `null` |
|
| `undefined` | `null` |
|
||||||
| `function` | `null` |
|
| `function` | `null` |
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
import type { Depth, ParsedLine } from '../types'
|
import type { Depth, ParsedLine } from '../types'
|
||||||
import { SPACE } from '../constants'
|
import { SPACE, TAB } from '../constants'
|
||||||
|
|
||||||
export class LineCursor {
|
export class LineCursor {
|
||||||
private lines: ParsedLine[]
|
private lines: ParsedLine[]
|
||||||
@@ -50,7 +50,7 @@ export class LineCursor {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export function toParsedLines(source: string, indentSize: number): ParsedLine[] {
|
export function toParsedLines(source: string, indentSize: number, strict: boolean): ParsedLine[] {
|
||||||
if (!source.trim()) {
|
if (!source.trim()) {
|
||||||
return []
|
return []
|
||||||
}
|
}
|
||||||
@@ -58,15 +58,41 @@ export function toParsedLines(source: string, indentSize: number): ParsedLine[]
|
|||||||
const lines = source.split('\n')
|
const lines = source.split('\n')
|
||||||
const parsed: ParsedLine[] = []
|
const parsed: ParsedLine[] = []
|
||||||
|
|
||||||
for (const raw of lines) {
|
for (let i = 0; i < lines.length; i++) {
|
||||||
|
const raw = lines[i]!
|
||||||
let indent = 0
|
let indent = 0
|
||||||
while (indent < raw.length && raw[indent] === SPACE) {
|
while (indent < raw.length && raw[indent] === SPACE) {
|
||||||
indent++
|
indent++
|
||||||
}
|
}
|
||||||
|
|
||||||
const content = raw.slice(indent)
|
const content = raw.slice(indent)
|
||||||
|
|
||||||
|
// Skip empty lines or lines with only whitespace
|
||||||
|
if (!content.trim()) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
const depth = computeDepthFromIndent(indent, indentSize)
|
const depth = computeDepthFromIndent(indent, indentSize)
|
||||||
|
|
||||||
|
// Strict mode validation
|
||||||
|
if (strict) {
|
||||||
|
// Find the full leading whitespace region (spaces and tabs)
|
||||||
|
let wsEnd = 0
|
||||||
|
while (wsEnd < raw.length && (raw[wsEnd] === SPACE || raw[wsEnd] === TAB)) {
|
||||||
|
wsEnd++
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for tabs in leading whitespace (before actual content)
|
||||||
|
if (raw.slice(0, wsEnd).includes(TAB)) {
|
||||||
|
throw new SyntaxError(`Line ${i + 1}: Tabs are not allowed in indentation in strict mode`)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for exact multiples of indentSize
|
||||||
|
if (indent > 0 && indent % indentSize !== 0) {
|
||||||
|
throw new SyntaxError(`Line ${i + 1}: Indentation must be exact multiple of ${indentSize}, but found ${indent} spaces`)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
parsed.push({ raw, indent, content, depth })
|
parsed.push({ raw, indent, content, depth })
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -26,15 +26,15 @@ export function encode(input: unknown, options?: EncodeOptions): string {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export function decode(input: string, options?: DecodeOptions): JsonValue {
|
export function decode(input: string, options?: DecodeOptions): JsonValue {
|
||||||
const resolved = resolveDecodeOptions(options)
|
const resolvedOptions = resolveDecodeOptions(options)
|
||||||
const lines = toParsedLines(input, resolved.indent)
|
const lines = toParsedLines(input, resolvedOptions.indent, resolvedOptions.strict)
|
||||||
|
|
||||||
if (lines.length === 0) {
|
if (lines.length === 0) {
|
||||||
throw new TypeError('Cannot decode empty input: input must be a non-empty string')
|
throw new TypeError('Cannot decode empty input: input must be a non-empty string')
|
||||||
}
|
}
|
||||||
|
|
||||||
const cursor = new LineCursor(lines)
|
const cursor = new LineCursor(lines)
|
||||||
return decodeValueFromLines(cursor, resolved)
|
return decodeValueFromLines(cursor, resolvedOptions)
|
||||||
}
|
}
|
||||||
|
|
||||||
function resolveOptions(options?: EncodeOptions): ResolvedEncodeOptions {
|
function resolveOptions(options?: EncodeOptions): ResolvedEncodeOptions {
|
||||||
|
|||||||
@@ -508,3 +508,97 @@ describe('error handling', () => {
|
|||||||
expect(() => decode(toon)).toThrow()
|
expect(() => decode(toon)).toThrow()
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
describe('strict mode: indentation validation', () => {
|
||||||
|
describe('non-multiple indentation errors', () => {
|
||||||
|
it('throws when object field has non-multiple indentation', () => {
|
||||||
|
const toon = 'a:\n b: 1' // 3 spaces with indent=2
|
||||||
|
expect(() => decode(toon)).toThrow(/indentation/i)
|
||||||
|
expect(() => decode(toon)).toThrow(/exact multiple/i)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('throws when list item has non-multiple indentation', () => {
|
||||||
|
const toon = 'items[2]:\n - id: 1\n - id: 2' // 3 spaces
|
||||||
|
expect(() => decode(toon)).toThrow(/indentation/i)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('throws with custom indent size when non-multiple', () => {
|
||||||
|
const toon = 'a:\n b: 1' // 3 spaces with indent=4
|
||||||
|
expect(() => decode(toon, { indent: 4 })).toThrow(/exact multiple of 4/i)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('accepts correct indentation with custom indent size', () => {
|
||||||
|
const toon = 'a:\n b: 1' // 4 spaces with indent=4
|
||||||
|
expect(decode(toon, { indent: 4 })).toEqual({ a: { b: 1 } })
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('tab character errors', () => {
|
||||||
|
it('throws when tab character used in indentation', () => {
|
||||||
|
const toon = 'a:\n\tb: 1'
|
||||||
|
expect(() => decode(toon)).toThrow(/tab/i)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('throws when mixed tabs and spaces in indentation', () => {
|
||||||
|
const toon = 'a:\n \tb: 1' // space + tab
|
||||||
|
expect(() => decode(toon)).toThrow(/tab/i)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('throws when tab at start of line', () => {
|
||||||
|
const toon = '\ta: 1'
|
||||||
|
expect(() => decode(toon)).toThrow(/tab/i)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('tabs in quoted strings are allowed', () => {
|
||||||
|
it('accepts tabs in quoted string values', () => {
|
||||||
|
const toon = 'text: "hello\tworld"'
|
||||||
|
expect(decode(toon)).toEqual({ text: 'hello\tworld' })
|
||||||
|
})
|
||||||
|
|
||||||
|
it('accepts tabs in quoted keys', () => {
|
||||||
|
const toon = '"key\ttab": value'
|
||||||
|
expect(decode(toon)).toEqual({ 'key\ttab': 'value' })
|
||||||
|
})
|
||||||
|
|
||||||
|
it('accepts tabs in quoted array elements', () => {
|
||||||
|
const toon = 'items[2]: "a\tb","c\td"'
|
||||||
|
expect(decode(toon)).toEqual({ items: ['a\tb', 'c\td'] })
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('non-strict mode', () => {
|
||||||
|
it('accepts non-multiple indentation when strict=false', () => {
|
||||||
|
const toon = 'a:\n b: 1' // 3 spaces with indent=2
|
||||||
|
expect(decode(toon, { strict: false })).toEqual({ a: { b: 1 } })
|
||||||
|
})
|
||||||
|
|
||||||
|
it('accepts tab indentation when strict=false', () => {
|
||||||
|
const toon = 'a:\n\tb: 1'
|
||||||
|
// Tabs are ignored in indentation counting, so depth=0, "b: 1" at root
|
||||||
|
expect(decode(toon, { strict: false })).toEqual({ a: {}, b: 1 })
|
||||||
|
})
|
||||||
|
|
||||||
|
it('accepts deeply nested non-multiples when strict=false', () => {
|
||||||
|
const toon = 'a:\n b:\n c: 1' // 3 and 5 spaces
|
||||||
|
expect(decode(toon, { strict: false })).toEqual({ a: { b: { c: 1 } } })
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('edge cases', () => {
|
||||||
|
it('empty lines do not trigger validation errors', () => {
|
||||||
|
const toon = 'a: 1\n\nb: 2'
|
||||||
|
expect(decode(toon)).toEqual({ a: 1, b: 2 })
|
||||||
|
})
|
||||||
|
|
||||||
|
it('root-level content (0 indentation) is always valid', () => {
|
||||||
|
const toon = 'a: 1\nb: 2\nc: 3'
|
||||||
|
expect(decode(toon)).toEqual({ a: 1, b: 2, c: 3 })
|
||||||
|
})
|
||||||
|
|
||||||
|
it('lines with only spaces are not validated if empty', () => {
|
||||||
|
const toon = 'a: 1\n \nb: 2'
|
||||||
|
expect(decode(toon)).toEqual({ a: 1, b: 2 })
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|||||||
Reference in New Issue
Block a user