From e6c006bc67ffa15c72ed2cd2442c238c5ed77650 Mon Sep 17 00:00:00 2001 From: Johann Schopplich Date: Wed, 29 Oct 2025 13:54:55 +0100 Subject: [PATCH] feat(decoder): indentation strict-mode enforcement --- README.md | 2 +- src/decode/scanner.ts | 32 +++++++++++++-- src/index.ts | 6 +-- test/decode.test.ts | 94 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 127 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 9b20ad6..33080bd 100644 --- a/README.md +++ b/README.md @@ -642,7 +642,7 @@ Some non-JSON types are automatically normalized for LLM-safe output: |---|---| | Number (finite) | Decimal form, no scientific notation (e.g., `-0` → `0`, `1e6` → `1000000`) | | Number (`NaN`, `±Infinity`) | `null` | -| `BigInt` | Decimal digits (no quotes) | +| `BigInt` | If within safe integer range: converted to number. Otherwise: quoted decimal string (e.g., `"9007199254740993"`) | | `Date` | ISO string in quotes (e.g., `"2025-01-01T00:00:00.000Z"`) | | `undefined` | `null` | | `function` | `null` | diff --git a/src/decode/scanner.ts b/src/decode/scanner.ts index 831eb20..89cd290 100644 --- a/src/decode/scanner.ts +++ b/src/decode/scanner.ts @@ -1,5 +1,5 @@ import type { Depth, ParsedLine } from '../types' -import { SPACE } from '../constants' +import { SPACE, TAB } from '../constants' export class LineCursor { private lines: ParsedLine[] @@ -50,7 +50,7 @@ export class LineCursor { } } -export function toParsedLines(source: string, indentSize: number): ParsedLine[] { +export function toParsedLines(source: string, indentSize: number, strict: boolean): ParsedLine[] { if (!source.trim()) { return [] } @@ -58,15 +58,41 @@ export function toParsedLines(source: string, indentSize: number): ParsedLine[] const lines = source.split('\n') const parsed: ParsedLine[] = [] - for (const raw of lines) { + for (let i = 0; i < lines.length; i++) { + const raw = lines[i]! let indent = 0 while (indent < raw.length && raw[indent] === SPACE) { indent++ } const content = raw.slice(indent) + + // Skip empty lines or lines with only whitespace + if (!content.trim()) { + continue + } + const depth = computeDepthFromIndent(indent, indentSize) + // Strict mode validation + if (strict) { + // Find the full leading whitespace region (spaces and tabs) + let wsEnd = 0 + while (wsEnd < raw.length && (raw[wsEnd] === SPACE || raw[wsEnd] === TAB)) { + wsEnd++ + } + + // Check for tabs in leading whitespace (before actual content) + if (raw.slice(0, wsEnd).includes(TAB)) { + throw new SyntaxError(`Line ${i + 1}: Tabs are not allowed in indentation in strict mode`) + } + + // Check for exact multiples of indentSize + if (indent > 0 && indent % indentSize !== 0) { + throw new SyntaxError(`Line ${i + 1}: Indentation must be exact multiple of ${indentSize}, but found ${indent} spaces`) + } + } + parsed.push({ raw, indent, content, depth }) } diff --git a/src/index.ts b/src/index.ts index 7649c5e..765c6a5 100644 --- a/src/index.ts +++ b/src/index.ts @@ -26,15 +26,15 @@ export function encode(input: unknown, options?: EncodeOptions): string { } export function decode(input: string, options?: DecodeOptions): JsonValue { - const resolved = resolveDecodeOptions(options) - const lines = toParsedLines(input, resolved.indent) + const resolvedOptions = resolveDecodeOptions(options) + const lines = toParsedLines(input, resolvedOptions.indent, resolvedOptions.strict) if (lines.length === 0) { throw new TypeError('Cannot decode empty input: input must be a non-empty string') } const cursor = new LineCursor(lines) - return decodeValueFromLines(cursor, resolved) + return decodeValueFromLines(cursor, resolvedOptions) } function resolveOptions(options?: EncodeOptions): ResolvedEncodeOptions { diff --git a/test/decode.test.ts b/test/decode.test.ts index 89a9722..2b38143 100644 --- a/test/decode.test.ts +++ b/test/decode.test.ts @@ -508,3 +508,97 @@ describe('error handling', () => { expect(() => decode(toon)).toThrow() }) }) + +describe('strict mode: indentation validation', () => { + describe('non-multiple indentation errors', () => { + it('throws when object field has non-multiple indentation', () => { + const toon = 'a:\n b: 1' // 3 spaces with indent=2 + expect(() => decode(toon)).toThrow(/indentation/i) + expect(() => decode(toon)).toThrow(/exact multiple/i) + }) + + it('throws when list item has non-multiple indentation', () => { + const toon = 'items[2]:\n - id: 1\n - id: 2' // 3 spaces + expect(() => decode(toon)).toThrow(/indentation/i) + }) + + it('throws with custom indent size when non-multiple', () => { + const toon = 'a:\n b: 1' // 3 spaces with indent=4 + expect(() => decode(toon, { indent: 4 })).toThrow(/exact multiple of 4/i) + }) + + it('accepts correct indentation with custom indent size', () => { + const toon = 'a:\n b: 1' // 4 spaces with indent=4 + expect(decode(toon, { indent: 4 })).toEqual({ a: { b: 1 } }) + }) + }) + + describe('tab character errors', () => { + it('throws when tab character used in indentation', () => { + const toon = 'a:\n\tb: 1' + expect(() => decode(toon)).toThrow(/tab/i) + }) + + it('throws when mixed tabs and spaces in indentation', () => { + const toon = 'a:\n \tb: 1' // space + tab + expect(() => decode(toon)).toThrow(/tab/i) + }) + + it('throws when tab at start of line', () => { + const toon = '\ta: 1' + expect(() => decode(toon)).toThrow(/tab/i) + }) + }) + + describe('tabs in quoted strings are allowed', () => { + it('accepts tabs in quoted string values', () => { + const toon = 'text: "hello\tworld"' + expect(decode(toon)).toEqual({ text: 'hello\tworld' }) + }) + + it('accepts tabs in quoted keys', () => { + const toon = '"key\ttab": value' + expect(decode(toon)).toEqual({ 'key\ttab': 'value' }) + }) + + it('accepts tabs in quoted array elements', () => { + const toon = 'items[2]: "a\tb","c\td"' + expect(decode(toon)).toEqual({ items: ['a\tb', 'c\td'] }) + }) + }) + + describe('non-strict mode', () => { + it('accepts non-multiple indentation when strict=false', () => { + const toon = 'a:\n b: 1' // 3 spaces with indent=2 + expect(decode(toon, { strict: false })).toEqual({ a: { b: 1 } }) + }) + + it('accepts tab indentation when strict=false', () => { + const toon = 'a:\n\tb: 1' + // Tabs are ignored in indentation counting, so depth=0, "b: 1" at root + expect(decode(toon, { strict: false })).toEqual({ a: {}, b: 1 }) + }) + + it('accepts deeply nested non-multiples when strict=false', () => { + const toon = 'a:\n b:\n c: 1' // 3 and 5 spaces + expect(decode(toon, { strict: false })).toEqual({ a: { b: { c: 1 } } }) + }) + }) + + describe('edge cases', () => { + it('empty lines do not trigger validation errors', () => { + const toon = 'a: 1\n\nb: 2' + expect(decode(toon)).toEqual({ a: 1, b: 2 }) + }) + + it('root-level content (0 indentation) is always valid', () => { + const toon = 'a: 1\nb: 2\nc: 3' + expect(decode(toon)).toEqual({ a: 1, b: 2, c: 3 }) + }) + + it('lines with only spaces are not validated if empty', () => { + const toon = 'a: 1\n \nb: 2' + expect(decode(toon)).toEqual({ a: 1, b: 2 }) + }) + }) +})