mirror of
https://github.com/voson-wang/toon.git
synced 2026-01-29 15:24:10 +08:00
feat(decoder): indentation strict-mode enforcement
This commit is contained in:
@@ -642,7 +642,7 @@ Some non-JSON types are automatically normalized for LLM-safe output:
|
||||
|---|---|
|
||||
| Number (finite) | Decimal form, no scientific notation (e.g., `-0` → `0`, `1e6` → `1000000`) |
|
||||
| Number (`NaN`, `±Infinity`) | `null` |
|
||||
| `BigInt` | Decimal digits (no quotes) |
|
||||
| `BigInt` | If within safe integer range: converted to number. Otherwise: quoted decimal string (e.g., `"9007199254740993"`) |
|
||||
| `Date` | ISO string in quotes (e.g., `"2025-01-01T00:00:00.000Z"`) |
|
||||
| `undefined` | `null` |
|
||||
| `function` | `null` |
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import type { Depth, ParsedLine } from '../types'
|
||||
import { SPACE } from '../constants'
|
||||
import { SPACE, TAB } from '../constants'
|
||||
|
||||
export class LineCursor {
|
||||
private lines: ParsedLine[]
|
||||
@@ -50,7 +50,7 @@ export class LineCursor {
|
||||
}
|
||||
}
|
||||
|
||||
export function toParsedLines(source: string, indentSize: number): ParsedLine[] {
|
||||
export function toParsedLines(source: string, indentSize: number, strict: boolean): ParsedLine[] {
|
||||
if (!source.trim()) {
|
||||
return []
|
||||
}
|
||||
@@ -58,15 +58,41 @@ export function toParsedLines(source: string, indentSize: number): ParsedLine[]
|
||||
const lines = source.split('\n')
|
||||
const parsed: ParsedLine[] = []
|
||||
|
||||
for (const raw of lines) {
|
||||
for (let i = 0; i < lines.length; i++) {
|
||||
const raw = lines[i]!
|
||||
let indent = 0
|
||||
while (indent < raw.length && raw[indent] === SPACE) {
|
||||
indent++
|
||||
}
|
||||
|
||||
const content = raw.slice(indent)
|
||||
|
||||
// Skip empty lines or lines with only whitespace
|
||||
if (!content.trim()) {
|
||||
continue
|
||||
}
|
||||
|
||||
const depth = computeDepthFromIndent(indent, indentSize)
|
||||
|
||||
// Strict mode validation
|
||||
if (strict) {
|
||||
// Find the full leading whitespace region (spaces and tabs)
|
||||
let wsEnd = 0
|
||||
while (wsEnd < raw.length && (raw[wsEnd] === SPACE || raw[wsEnd] === TAB)) {
|
||||
wsEnd++
|
||||
}
|
||||
|
||||
// Check for tabs in leading whitespace (before actual content)
|
||||
if (raw.slice(0, wsEnd).includes(TAB)) {
|
||||
throw new SyntaxError(`Line ${i + 1}: Tabs are not allowed in indentation in strict mode`)
|
||||
}
|
||||
|
||||
// Check for exact multiples of indentSize
|
||||
if (indent > 0 && indent % indentSize !== 0) {
|
||||
throw new SyntaxError(`Line ${i + 1}: Indentation must be exact multiple of ${indentSize}, but found ${indent} spaces`)
|
||||
}
|
||||
}
|
||||
|
||||
parsed.push({ raw, indent, content, depth })
|
||||
}
|
||||
|
||||
|
||||
@@ -26,15 +26,15 @@ export function encode(input: unknown, options?: EncodeOptions): string {
|
||||
}
|
||||
|
||||
export function decode(input: string, options?: DecodeOptions): JsonValue {
|
||||
const resolved = resolveDecodeOptions(options)
|
||||
const lines = toParsedLines(input, resolved.indent)
|
||||
const resolvedOptions = resolveDecodeOptions(options)
|
||||
const lines = toParsedLines(input, resolvedOptions.indent, resolvedOptions.strict)
|
||||
|
||||
if (lines.length === 0) {
|
||||
throw new TypeError('Cannot decode empty input: input must be a non-empty string')
|
||||
}
|
||||
|
||||
const cursor = new LineCursor(lines)
|
||||
return decodeValueFromLines(cursor, resolved)
|
||||
return decodeValueFromLines(cursor, resolvedOptions)
|
||||
}
|
||||
|
||||
function resolveOptions(options?: EncodeOptions): ResolvedEncodeOptions {
|
||||
|
||||
@@ -508,3 +508,97 @@ describe('error handling', () => {
|
||||
expect(() => decode(toon)).toThrow()
|
||||
})
|
||||
})
|
||||
|
||||
describe('strict mode: indentation validation', () => {
|
||||
describe('non-multiple indentation errors', () => {
|
||||
it('throws when object field has non-multiple indentation', () => {
|
||||
const toon = 'a:\n b: 1' // 3 spaces with indent=2
|
||||
expect(() => decode(toon)).toThrow(/indentation/i)
|
||||
expect(() => decode(toon)).toThrow(/exact multiple/i)
|
||||
})
|
||||
|
||||
it('throws when list item has non-multiple indentation', () => {
|
||||
const toon = 'items[2]:\n - id: 1\n - id: 2' // 3 spaces
|
||||
expect(() => decode(toon)).toThrow(/indentation/i)
|
||||
})
|
||||
|
||||
it('throws with custom indent size when non-multiple', () => {
|
||||
const toon = 'a:\n b: 1' // 3 spaces with indent=4
|
||||
expect(() => decode(toon, { indent: 4 })).toThrow(/exact multiple of 4/i)
|
||||
})
|
||||
|
||||
it('accepts correct indentation with custom indent size', () => {
|
||||
const toon = 'a:\n b: 1' // 4 spaces with indent=4
|
||||
expect(decode(toon, { indent: 4 })).toEqual({ a: { b: 1 } })
|
||||
})
|
||||
})
|
||||
|
||||
describe('tab character errors', () => {
|
||||
it('throws when tab character used in indentation', () => {
|
||||
const toon = 'a:\n\tb: 1'
|
||||
expect(() => decode(toon)).toThrow(/tab/i)
|
||||
})
|
||||
|
||||
it('throws when mixed tabs and spaces in indentation', () => {
|
||||
const toon = 'a:\n \tb: 1' // space + tab
|
||||
expect(() => decode(toon)).toThrow(/tab/i)
|
||||
})
|
||||
|
||||
it('throws when tab at start of line', () => {
|
||||
const toon = '\ta: 1'
|
||||
expect(() => decode(toon)).toThrow(/tab/i)
|
||||
})
|
||||
})
|
||||
|
||||
describe('tabs in quoted strings are allowed', () => {
|
||||
it('accepts tabs in quoted string values', () => {
|
||||
const toon = 'text: "hello\tworld"'
|
||||
expect(decode(toon)).toEqual({ text: 'hello\tworld' })
|
||||
})
|
||||
|
||||
it('accepts tabs in quoted keys', () => {
|
||||
const toon = '"key\ttab": value'
|
||||
expect(decode(toon)).toEqual({ 'key\ttab': 'value' })
|
||||
})
|
||||
|
||||
it('accepts tabs in quoted array elements', () => {
|
||||
const toon = 'items[2]: "a\tb","c\td"'
|
||||
expect(decode(toon)).toEqual({ items: ['a\tb', 'c\td'] })
|
||||
})
|
||||
})
|
||||
|
||||
describe('non-strict mode', () => {
|
||||
it('accepts non-multiple indentation when strict=false', () => {
|
||||
const toon = 'a:\n b: 1' // 3 spaces with indent=2
|
||||
expect(decode(toon, { strict: false })).toEqual({ a: { b: 1 } })
|
||||
})
|
||||
|
||||
it('accepts tab indentation when strict=false', () => {
|
||||
const toon = 'a:\n\tb: 1'
|
||||
// Tabs are ignored in indentation counting, so depth=0, "b: 1" at root
|
||||
expect(decode(toon, { strict: false })).toEqual({ a: {}, b: 1 })
|
||||
})
|
||||
|
||||
it('accepts deeply nested non-multiples when strict=false', () => {
|
||||
const toon = 'a:\n b:\n c: 1' // 3 and 5 spaces
|
||||
expect(decode(toon, { strict: false })).toEqual({ a: { b: { c: 1 } } })
|
||||
})
|
||||
})
|
||||
|
||||
describe('edge cases', () => {
|
||||
it('empty lines do not trigger validation errors', () => {
|
||||
const toon = 'a: 1\n\nb: 2'
|
||||
expect(decode(toon)).toEqual({ a: 1, b: 2 })
|
||||
})
|
||||
|
||||
it('root-level content (0 indentation) is always valid', () => {
|
||||
const toon = 'a: 1\nb: 2\nc: 3'
|
||||
expect(decode(toon)).toEqual({ a: 1, b: 2, c: 3 })
|
||||
})
|
||||
|
||||
it('lines with only spaces are not validated if empty', () => {
|
||||
const toon = 'a: 1\n \nb: 2'
|
||||
expect(decode(toon)).toEqual({ a: 1, b: 2 })
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
Reference in New Issue
Block a user