feat(decoder): indentation strict-mode enforcement

This commit is contained in:
Johann Schopplich
2025-10-29 13:54:55 +01:00
parent 6040c018e0
commit e6c006bc67
4 changed files with 127 additions and 7 deletions

View File

@@ -642,7 +642,7 @@ Some non-JSON types are automatically normalized for LLM-safe output:
|---|---|
| Number (finite) | Decimal form, no scientific notation (e.g., `-0``0`, `1e6``1000000`) |
| Number (`NaN`, `±Infinity`) | `null` |
| `BigInt` | Decimal digits (no quotes) |
| `BigInt` | If within safe integer range: converted to number. Otherwise: quoted decimal string (e.g., `"9007199254740993"`) |
| `Date` | ISO string in quotes (e.g., `"2025-01-01T00:00:00.000Z"`) |
| `undefined` | `null` |
| `function` | `null` |

View File

@@ -1,5 +1,5 @@
import type { Depth, ParsedLine } from '../types'
import { SPACE } from '../constants'
import { SPACE, TAB } from '../constants'
export class LineCursor {
private lines: ParsedLine[]
@@ -50,7 +50,7 @@ export class LineCursor {
}
}
export function toParsedLines(source: string, indentSize: number): ParsedLine[] {
export function toParsedLines(source: string, indentSize: number, strict: boolean): ParsedLine[] {
if (!source.trim()) {
return []
}
@@ -58,15 +58,41 @@ export function toParsedLines(source: string, indentSize: number): ParsedLine[]
const lines = source.split('\n')
const parsed: ParsedLine[] = []
for (const raw of lines) {
for (let i = 0; i < lines.length; i++) {
const raw = lines[i]!
let indent = 0
while (indent < raw.length && raw[indent] === SPACE) {
indent++
}
const content = raw.slice(indent)
// Skip empty lines or lines with only whitespace
if (!content.trim()) {
continue
}
const depth = computeDepthFromIndent(indent, indentSize)
// Strict mode validation
if (strict) {
// Find the full leading whitespace region (spaces and tabs)
let wsEnd = 0
while (wsEnd < raw.length && (raw[wsEnd] === SPACE || raw[wsEnd] === TAB)) {
wsEnd++
}
// Check for tabs in leading whitespace (before actual content)
if (raw.slice(0, wsEnd).includes(TAB)) {
throw new SyntaxError(`Line ${i + 1}: Tabs are not allowed in indentation in strict mode`)
}
// Check for exact multiples of indentSize
if (indent > 0 && indent % indentSize !== 0) {
throw new SyntaxError(`Line ${i + 1}: Indentation must be exact multiple of ${indentSize}, but found ${indent} spaces`)
}
}
parsed.push({ raw, indent, content, depth })
}

View File

@@ -26,15 +26,15 @@ export function encode(input: unknown, options?: EncodeOptions): string {
}
export function decode(input: string, options?: DecodeOptions): JsonValue {
const resolved = resolveDecodeOptions(options)
const lines = toParsedLines(input, resolved.indent)
const resolvedOptions = resolveDecodeOptions(options)
const lines = toParsedLines(input, resolvedOptions.indent, resolvedOptions.strict)
if (lines.length === 0) {
throw new TypeError('Cannot decode empty input: input must be a non-empty string')
}
const cursor = new LineCursor(lines)
return decodeValueFromLines(cursor, resolved)
return decodeValueFromLines(cursor, resolvedOptions)
}
function resolveOptions(options?: EncodeOptions): ResolvedEncodeOptions {

View File

@@ -508,3 +508,97 @@ describe('error handling', () => {
expect(() => decode(toon)).toThrow()
})
})
describe('strict mode: indentation validation', () => {
describe('non-multiple indentation errors', () => {
it('throws when object field has non-multiple indentation', () => {
const toon = 'a:\n b: 1' // 3 spaces with indent=2
expect(() => decode(toon)).toThrow(/indentation/i)
expect(() => decode(toon)).toThrow(/exact multiple/i)
})
it('throws when list item has non-multiple indentation', () => {
const toon = 'items[2]:\n - id: 1\n - id: 2' // 3 spaces
expect(() => decode(toon)).toThrow(/indentation/i)
})
it('throws with custom indent size when non-multiple', () => {
const toon = 'a:\n b: 1' // 3 spaces with indent=4
expect(() => decode(toon, { indent: 4 })).toThrow(/exact multiple of 4/i)
})
it('accepts correct indentation with custom indent size', () => {
const toon = 'a:\n b: 1' // 4 spaces with indent=4
expect(decode(toon, { indent: 4 })).toEqual({ a: { b: 1 } })
})
})
describe('tab character errors', () => {
it('throws when tab character used in indentation', () => {
const toon = 'a:\n\tb: 1'
expect(() => decode(toon)).toThrow(/tab/i)
})
it('throws when mixed tabs and spaces in indentation', () => {
const toon = 'a:\n \tb: 1' // space + tab
expect(() => decode(toon)).toThrow(/tab/i)
})
it('throws when tab at start of line', () => {
const toon = '\ta: 1'
expect(() => decode(toon)).toThrow(/tab/i)
})
})
describe('tabs in quoted strings are allowed', () => {
it('accepts tabs in quoted string values', () => {
const toon = 'text: "hello\tworld"'
expect(decode(toon)).toEqual({ text: 'hello\tworld' })
})
it('accepts tabs in quoted keys', () => {
const toon = '"key\ttab": value'
expect(decode(toon)).toEqual({ 'key\ttab': 'value' })
})
it('accepts tabs in quoted array elements', () => {
const toon = 'items[2]: "a\tb","c\td"'
expect(decode(toon)).toEqual({ items: ['a\tb', 'c\td'] })
})
})
describe('non-strict mode', () => {
it('accepts non-multiple indentation when strict=false', () => {
const toon = 'a:\n b: 1' // 3 spaces with indent=2
expect(decode(toon, { strict: false })).toEqual({ a: { b: 1 } })
})
it('accepts tab indentation when strict=false', () => {
const toon = 'a:\n\tb: 1'
// Tabs are ignored in indentation counting, so depth=0, "b: 1" at root
expect(decode(toon, { strict: false })).toEqual({ a: {}, b: 1 })
})
it('accepts deeply nested non-multiples when strict=false', () => {
const toon = 'a:\n b:\n c: 1' // 3 and 5 spaces
expect(decode(toon, { strict: false })).toEqual({ a: { b: { c: 1 } } })
})
})
describe('edge cases', () => {
it('empty lines do not trigger validation errors', () => {
const toon = 'a: 1\n\nb: 2'
expect(decode(toon)).toEqual({ a: 1, b: 2 })
})
it('root-level content (0 indentation) is always valid', () => {
const toon = 'a: 1\nb: 2\nc: 3'
expect(decode(toon)).toEqual({ a: 1, b: 2, c: 3 })
})
it('lines with only spaces are not validated if empty', () => {
const toon = 'a: 1\n \nb: 2'
expect(decode(toon)).toEqual({ a: 1, b: 2 })
})
})
})