From bd06e5b1ea229e66c43d701bbc3e441020e9fc2f Mon Sep 17 00:00:00 2001 From: Johann Schopplich Date: Wed, 29 Oct 2025 14:51:53 +0100 Subject: [PATCH] feat(decoder): blank line validation for strict mode in arrays and tabular rows --- src/decode/decoders.ts | 50 ++++++++++++++++++++++++- src/decode/scanner.ts | 33 ++++++++++++----- src/decode/validation.ts | 40 +++++++++++++++++++- src/index.ts | 6 +-- src/types.ts | 7 ++++ test/decode.test.ts | 80 ++++++++++++++++++++++++++++++++++++++++ 6 files changed, 202 insertions(+), 14 deletions(-) diff --git a/src/decode/decoders.ts b/src/decode/decoders.ts index 28423e0..609a9f7 100644 --- a/src/decode/decoders.ts +++ b/src/decode/decoders.ts @@ -3,7 +3,7 @@ import type { LineCursor } from './scanner' import { COLON, DEFAULT_DELIMITER, LIST_ITEM_PREFIX } from '../constants' import { findClosingQuote } from '../shared/string-utils' import { isArrayHeaderAfterHyphen, isObjectFirstFieldAfterHyphen, mapRowValuesToPrimitives, parseArrayHeaderLine, parseDelimitedValues, parseKeyToken, parsePrimitiveToken } from './parser' -import { assertExpectedCount, validateNoExtraListItems, validateNoExtraTabularRows } from './validation' +import { assertExpectedCount, validateNoBlankLinesInRange, validateNoExtraListItems, validateNoExtraTabularRows } from './validation' // #region Entry decoding @@ -179,6 +179,10 @@ function decodeListArray( const items: JsonValue[] = [] const itemDepth = baseDepth + 1 + // Track line range for blank line validation + let startLine: number | undefined + let endLine: number | undefined + while (!cursor.atEnd() && items.length < header.length) { const line = cursor.peek() if (!line || line.depth < itemDepth) { @@ -186,8 +190,20 @@ function decodeListArray( } if (line.depth === itemDepth && line.content.startsWith(LIST_ITEM_PREFIX)) { + // Track first and last item line numbers + if (startLine === undefined) { + startLine = line.lineNumber + } + endLine = line.lineNumber + const item = decodeListItem(cursor, itemDepth, header.delimiter, options) items.push(item) + + // Update endLine to the current cursor position (after item was decoded) + const currentLine = cursor.current() + if (currentLine) { + endLine = currentLine.lineNumber + } } else { break @@ -196,6 +212,17 @@ function decodeListArray( assertExpectedCount(items.length, header.length, 'list array items', options) + // In strict mode, check for blank lines inside the array + if (options.strict && startLine !== undefined && endLine !== undefined) { + validateNoBlankLinesInRange( + startLine, // From first item line + endLine, // To last item line + cursor.getBlankLines(), + options.strict, + 'list array', + ) + } + // In strict mode, check for extra items if (options.strict) { validateNoExtraListItems(cursor, itemDepth, header.length) @@ -213,6 +240,10 @@ function decodeTabularArray( const objects: JsonObject[] = [] const rowDepth = baseDepth + 1 + // Track line range for blank line validation + let startLine: number | undefined + let endLine: number | undefined + while (!cursor.atEnd() && objects.length < header.length) { const line = cursor.peek() if (!line || line.depth < rowDepth) { @@ -220,6 +251,12 @@ function decodeTabularArray( } if (line.depth === rowDepth) { + // Track first and last row line numbers + if (startLine === undefined) { + startLine = line.lineNumber + } + endLine = line.lineNumber + cursor.advance() const values = parseDelimitedValues(line.content, header.delimiter) assertExpectedCount(values.length, header.fields!.length, 'tabular row values', options) @@ -240,6 +277,17 @@ function decodeTabularArray( assertExpectedCount(objects.length, header.length, 'tabular rows', options) + // In strict mode, check for blank lines inside the array + if (options.strict && startLine !== undefined && endLine !== undefined) { + validateNoBlankLinesInRange( + startLine, // From first row line + endLine, // To last row line + cursor.getBlankLines(), + options.strict, + 'tabular array', + ) + } + // In strict mode, check for extra rows if (options.strict) { validateNoExtraTabularRows(cursor, rowDepth, header) diff --git a/src/decode/scanner.ts b/src/decode/scanner.ts index 89cd290..f8fcbe4 100644 --- a/src/decode/scanner.ts +++ b/src/decode/scanner.ts @@ -1,13 +1,24 @@ -import type { Depth, ParsedLine } from '../types' +import type { BlankLineInfo, Depth, ParsedLine } from '../types' import { SPACE, TAB } from '../constants' +export interface ScanResult { + lines: ParsedLine[] + blankLines: BlankLineInfo[] +} + export class LineCursor { private lines: ParsedLine[] private index: number + private blankLines: BlankLineInfo[] - constructor(lines: ParsedLine[]) { + constructor(lines: ParsedLine[], blankLines: BlankLineInfo[] = []) { this.lines = lines this.index = 0 + this.blankLines = blankLines + } + + getBlankLines(): BlankLineInfo[] { + return this.blankLines } peek(): ParsedLine | undefined { @@ -50,16 +61,18 @@ export class LineCursor { } } -export function toParsedLines(source: string, indentSize: number, strict: boolean): ParsedLine[] { +export function toParsedLines(source: string, indentSize: number, strict: boolean): ScanResult { if (!source.trim()) { - return [] + return { lines: [], blankLines: [] } } const lines = source.split('\n') const parsed: ParsedLine[] = [] + const blankLines: BlankLineInfo[] = [] for (let i = 0; i < lines.length; i++) { const raw = lines[i]! + const lineNumber = i + 1 let indent = 0 while (indent < raw.length && raw[indent] === SPACE) { indent++ @@ -67,8 +80,10 @@ export function toParsedLines(source: string, indentSize: number, strict: boolea const content = raw.slice(indent) - // Skip empty lines or lines with only whitespace + // Track blank lines if (!content.trim()) { + const depth = computeDepthFromIndent(indent, indentSize) + blankLines.push({ lineNumber, indent, depth }) continue } @@ -84,19 +99,19 @@ export function toParsedLines(source: string, indentSize: number, strict: boolea // Check for tabs in leading whitespace (before actual content) if (raw.slice(0, wsEnd).includes(TAB)) { - throw new SyntaxError(`Line ${i + 1}: Tabs are not allowed in indentation in strict mode`) + throw new SyntaxError(`Line ${lineNumber}: Tabs are not allowed in indentation in strict mode`) } // Check for exact multiples of indentSize if (indent > 0 && indent % indentSize !== 0) { - throw new SyntaxError(`Line ${i + 1}: Indentation must be exact multiple of ${indentSize}, but found ${indent} spaces`) + throw new SyntaxError(`Line ${lineNumber}: Indentation must be exact multiple of ${indentSize}, but found ${indent} spaces`) } } - parsed.push({ raw, indent, content, depth }) + parsed.push({ raw, indent, content, depth, lineNumber }) } - return parsed + return { lines: parsed, blankLines } } function computeDepthFromIndent(indentSpaces: number, indentSize: number): Depth { diff --git a/src/decode/validation.ts b/src/decode/validation.ts index cc8bad4..233ee4e 100644 --- a/src/decode/validation.ts +++ b/src/decode/validation.ts @@ -1,4 +1,4 @@ -import type { ArrayHeaderInfo, Delimiter, Depth, ResolvedDecodeOptions } from '../types' +import type { ArrayHeaderInfo, BlankLineInfo, Delimiter, Depth, ResolvedDecodeOptions } from '../types' import type { LineCursor } from './scanner' import { COLON, LIST_ITEM_PREFIX } from '../constants' @@ -71,6 +71,44 @@ export function validateNoExtraTabularRows( } } +/** + * Validates that there are no blank lines within a specific line range and depth. + * + * @remarks + * In strict mode, blank lines inside arrays/tabular rows are not allowed. + * + * @param startLine The starting line number (inclusive) + * @param endLine The ending line number (inclusive) + * @param blankLines Array of blank line information + * @param strict Whether strict mode is enabled + * @param context Description of the context (e.g., "list array", "tabular array") + * @throws SyntaxError if blank lines are found in strict mode + */ +export function validateNoBlankLinesInRange( + startLine: number, + endLine: number, + blankLines: BlankLineInfo[], + strict: boolean, + context: string, +): void { + if (!strict) + return + + // Find blank lines within the range + // Note: We don't filter by depth because ANY blank line between array items is an error, + // regardless of its indentation level + const blanksInRange = blankLines.filter( + blank => blank.lineNumber > startLine + && blank.lineNumber < endLine, + ) + + if (blanksInRange.length > 0) { + throw new SyntaxError( + `Line ${blanksInRange[0]!.lineNumber}: Blank lines inside ${context} are not allowed in strict mode`, + ) + } +} + /** * Checks if a line represents a data row (as opposed to a key-value pair) in a tabular array. * diff --git a/src/index.ts b/src/index.ts index 765c6a5..bedf973 100644 --- a/src/index.ts +++ b/src/index.ts @@ -27,13 +27,13 @@ export function encode(input: unknown, options?: EncodeOptions): string { export function decode(input: string, options?: DecodeOptions): JsonValue { const resolvedOptions = resolveDecodeOptions(options) - const lines = toParsedLines(input, resolvedOptions.indent, resolvedOptions.strict) + const scanResult = toParsedLines(input, resolvedOptions.indent, resolvedOptions.strict) - if (lines.length === 0) { + if (scanResult.lines.length === 0) { throw new TypeError('Cannot decode empty input: input must be a non-empty string') } - const cursor = new LineCursor(lines) + const cursor = new LineCursor(scanResult.lines, scanResult.blankLines) return decodeValueFromLines(cursor, resolvedOptions) } diff --git a/src/types.ts b/src/types.ts index e97f148..ef20622 100644 --- a/src/types.ts +++ b/src/types.ts @@ -70,6 +70,13 @@ export interface ParsedLine { depth: Depth indent: number content: string + lineNumber: number +} + +export interface BlankLineInfo { + lineNumber: number + indent: number + depth: Depth } // #endregion diff --git a/test/decode.test.ts b/test/decode.test.ts index 2b38143..3329497 100644 --- a/test/decode.test.ts +++ b/test/decode.test.ts @@ -602,3 +602,83 @@ describe('strict mode: indentation validation', () => { }) }) }) + +describe('blank lines in arrays', () => { + describe('strict mode: errors on blank lines inside arrays', () => { + it('throws on blank line inside list array', () => { + const teon = 'items[3]:\n - a\n\n - b\n - c' + expect(() => decode(teon)).toThrow(/blank line/i) + expect(() => decode(teon)).toThrow(/list array/i) + }) + + it('throws on blank line inside tabular array', () => { + const teon = 'items[2]{id}:\n 1\n\n 2' + expect(() => decode(teon)).toThrow(/blank line/i) + expect(() => decode(teon)).toThrow(/tabular array/i) + }) + + it('throws on multiple blank lines inside array', () => { + const teon = 'items[2]:\n - a\n\n\n - b' + expect(() => decode(teon)).toThrow(/blank line/i) + }) + + it('throws on blank line with spaces inside array', () => { + const teon = 'items[2]:\n - a\n \n - b' + expect(() => decode(teon)).toThrow(/blank line/i) + }) + + it('throws on blank line in nested list array', () => { + const teon = 'outer[2]:\n - inner[2]:\n - a\n\n - b\n - x' + expect(() => decode(teon)).toThrow(/blank line/i) + }) + }) + + describe('accepts blank lines outside arrays', () => { + it('accepts blank line between root-level fields', () => { + const teon = 'a: 1\n\nb: 2' + expect(decode(teon)).toEqual({ a: 1, b: 2 }) + }) + + it('accepts trailing newline at end of file', () => { + const teon = 'a: 1\n' + expect(decode(teon)).toEqual({ a: 1 }) + }) + + it('accepts multiple trailing newlines', () => { + const teon = 'a: 1\n\n\n' + expect(decode(teon)).toEqual({ a: 1 }) + }) + + it('accepts blank line after array ends', () => { + const teon = 'items[1]:\n - a\n\nb: 2' + expect(decode(teon)).toEqual({ items: ['a'], b: 2 }) + }) + + it('accepts blank line between nested object fields', () => { + const teon = 'a:\n b: 1\n\n c: 2' + expect(decode(teon)).toEqual({ a: { b: 1, c: 2 } }) + }) + }) + + describe('non-strict mode: ignores blank lines', () => { + it('ignores blank lines inside list array', () => { + const teon = 'items[3]:\n - a\n\n - b\n - c' + expect(decode(teon, { strict: false })).toEqual({ items: ['a', 'b', 'c'] }) + }) + + it('ignores blank lines inside tabular array', () => { + const teon = 'items[2]{id,name}:\n 1,Alice\n\n 2,Bob' + expect(decode(teon, { strict: false })).toEqual({ + items: [ + { id: 1, name: 'Alice' }, + { id: 2, name: 'Bob' }, + ], + }) + }) + + it('ignores multiple blank lines in arrays', () => { + const teon = 'items[2]:\n - a\n\n\n - b' + expect(decode(teon, { strict: false })).toEqual({ items: ['a', 'b'] }) + }) + }) +})