feat(decoder): blank line validation for strict mode in arrays and tabular rows

This commit is contained in:
Johann Schopplich
2025-10-29 14:51:53 +01:00
parent e6c006bc67
commit bd06e5b1ea
6 changed files with 202 additions and 14 deletions

View File

@@ -3,7 +3,7 @@ import type { LineCursor } from './scanner'
import { COLON, DEFAULT_DELIMITER, LIST_ITEM_PREFIX } from '../constants' import { COLON, DEFAULT_DELIMITER, LIST_ITEM_PREFIX } from '../constants'
import { findClosingQuote } from '../shared/string-utils' import { findClosingQuote } from '../shared/string-utils'
import { isArrayHeaderAfterHyphen, isObjectFirstFieldAfterHyphen, mapRowValuesToPrimitives, parseArrayHeaderLine, parseDelimitedValues, parseKeyToken, parsePrimitiveToken } from './parser' import { isArrayHeaderAfterHyphen, isObjectFirstFieldAfterHyphen, mapRowValuesToPrimitives, parseArrayHeaderLine, parseDelimitedValues, parseKeyToken, parsePrimitiveToken } from './parser'
import { assertExpectedCount, validateNoExtraListItems, validateNoExtraTabularRows } from './validation' import { assertExpectedCount, validateNoBlankLinesInRange, validateNoExtraListItems, validateNoExtraTabularRows } from './validation'
// #region Entry decoding // #region Entry decoding
@@ -179,6 +179,10 @@ function decodeListArray(
const items: JsonValue[] = [] const items: JsonValue[] = []
const itemDepth = baseDepth + 1 const itemDepth = baseDepth + 1
// Track line range for blank line validation
let startLine: number | undefined
let endLine: number | undefined
while (!cursor.atEnd() && items.length < header.length) { while (!cursor.atEnd() && items.length < header.length) {
const line = cursor.peek() const line = cursor.peek()
if (!line || line.depth < itemDepth) { if (!line || line.depth < itemDepth) {
@@ -186,8 +190,20 @@ function decodeListArray(
} }
if (line.depth === itemDepth && line.content.startsWith(LIST_ITEM_PREFIX)) { if (line.depth === itemDepth && line.content.startsWith(LIST_ITEM_PREFIX)) {
// Track first and last item line numbers
if (startLine === undefined) {
startLine = line.lineNumber
}
endLine = line.lineNumber
const item = decodeListItem(cursor, itemDepth, header.delimiter, options) const item = decodeListItem(cursor, itemDepth, header.delimiter, options)
items.push(item) items.push(item)
// Update endLine to the current cursor position (after item was decoded)
const currentLine = cursor.current()
if (currentLine) {
endLine = currentLine.lineNumber
}
} }
else { else {
break break
@@ -196,6 +212,17 @@ function decodeListArray(
assertExpectedCount(items.length, header.length, 'list array items', options) assertExpectedCount(items.length, header.length, 'list array items', options)
// In strict mode, check for blank lines inside the array
if (options.strict && startLine !== undefined && endLine !== undefined) {
validateNoBlankLinesInRange(
startLine, // From first item line
endLine, // To last item line
cursor.getBlankLines(),
options.strict,
'list array',
)
}
// In strict mode, check for extra items // In strict mode, check for extra items
if (options.strict) { if (options.strict) {
validateNoExtraListItems(cursor, itemDepth, header.length) validateNoExtraListItems(cursor, itemDepth, header.length)
@@ -213,6 +240,10 @@ function decodeTabularArray(
const objects: JsonObject[] = [] const objects: JsonObject[] = []
const rowDepth = baseDepth + 1 const rowDepth = baseDepth + 1
// Track line range for blank line validation
let startLine: number | undefined
let endLine: number | undefined
while (!cursor.atEnd() && objects.length < header.length) { while (!cursor.atEnd() && objects.length < header.length) {
const line = cursor.peek() const line = cursor.peek()
if (!line || line.depth < rowDepth) { if (!line || line.depth < rowDepth) {
@@ -220,6 +251,12 @@ function decodeTabularArray(
} }
if (line.depth === rowDepth) { if (line.depth === rowDepth) {
// Track first and last row line numbers
if (startLine === undefined) {
startLine = line.lineNumber
}
endLine = line.lineNumber
cursor.advance() cursor.advance()
const values = parseDelimitedValues(line.content, header.delimiter) const values = parseDelimitedValues(line.content, header.delimiter)
assertExpectedCount(values.length, header.fields!.length, 'tabular row values', options) assertExpectedCount(values.length, header.fields!.length, 'tabular row values', options)
@@ -240,6 +277,17 @@ function decodeTabularArray(
assertExpectedCount(objects.length, header.length, 'tabular rows', options) assertExpectedCount(objects.length, header.length, 'tabular rows', options)
// In strict mode, check for blank lines inside the array
if (options.strict && startLine !== undefined && endLine !== undefined) {
validateNoBlankLinesInRange(
startLine, // From first row line
endLine, // To last row line
cursor.getBlankLines(),
options.strict,
'tabular array',
)
}
// In strict mode, check for extra rows // In strict mode, check for extra rows
if (options.strict) { if (options.strict) {
validateNoExtraTabularRows(cursor, rowDepth, header) validateNoExtraTabularRows(cursor, rowDepth, header)

View File

@@ -1,13 +1,24 @@
import type { Depth, ParsedLine } from '../types' import type { BlankLineInfo, Depth, ParsedLine } from '../types'
import { SPACE, TAB } from '../constants' import { SPACE, TAB } from '../constants'
export interface ScanResult {
lines: ParsedLine[]
blankLines: BlankLineInfo[]
}
export class LineCursor { export class LineCursor {
private lines: ParsedLine[] private lines: ParsedLine[]
private index: number private index: number
private blankLines: BlankLineInfo[]
constructor(lines: ParsedLine[]) { constructor(lines: ParsedLine[], blankLines: BlankLineInfo[] = []) {
this.lines = lines this.lines = lines
this.index = 0 this.index = 0
this.blankLines = blankLines
}
getBlankLines(): BlankLineInfo[] {
return this.blankLines
} }
peek(): ParsedLine | undefined { peek(): ParsedLine | undefined {
@@ -50,16 +61,18 @@ export class LineCursor {
} }
} }
export function toParsedLines(source: string, indentSize: number, strict: boolean): ParsedLine[] { export function toParsedLines(source: string, indentSize: number, strict: boolean): ScanResult {
if (!source.trim()) { if (!source.trim()) {
return [] return { lines: [], blankLines: [] }
} }
const lines = source.split('\n') const lines = source.split('\n')
const parsed: ParsedLine[] = [] const parsed: ParsedLine[] = []
const blankLines: BlankLineInfo[] = []
for (let i = 0; i < lines.length; i++) { for (let i = 0; i < lines.length; i++) {
const raw = lines[i]! const raw = lines[i]!
const lineNumber = i + 1
let indent = 0 let indent = 0
while (indent < raw.length && raw[indent] === SPACE) { while (indent < raw.length && raw[indent] === SPACE) {
indent++ indent++
@@ -67,8 +80,10 @@ export function toParsedLines(source: string, indentSize: number, strict: boolea
const content = raw.slice(indent) const content = raw.slice(indent)
// Skip empty lines or lines with only whitespace // Track blank lines
if (!content.trim()) { if (!content.trim()) {
const depth = computeDepthFromIndent(indent, indentSize)
blankLines.push({ lineNumber, indent, depth })
continue continue
} }
@@ -84,19 +99,19 @@ export function toParsedLines(source: string, indentSize: number, strict: boolea
// Check for tabs in leading whitespace (before actual content) // Check for tabs in leading whitespace (before actual content)
if (raw.slice(0, wsEnd).includes(TAB)) { if (raw.slice(0, wsEnd).includes(TAB)) {
throw new SyntaxError(`Line ${i + 1}: Tabs are not allowed in indentation in strict mode`) throw new SyntaxError(`Line ${lineNumber}: Tabs are not allowed in indentation in strict mode`)
} }
// Check for exact multiples of indentSize // Check for exact multiples of indentSize
if (indent > 0 && indent % indentSize !== 0) { if (indent > 0 && indent % indentSize !== 0) {
throw new SyntaxError(`Line ${i + 1}: Indentation must be exact multiple of ${indentSize}, but found ${indent} spaces`) throw new SyntaxError(`Line ${lineNumber}: Indentation must be exact multiple of ${indentSize}, but found ${indent} spaces`)
} }
} }
parsed.push({ raw, indent, content, depth }) parsed.push({ raw, indent, content, depth, lineNumber })
} }
return parsed return { lines: parsed, blankLines }
} }
function computeDepthFromIndent(indentSpaces: number, indentSize: number): Depth { function computeDepthFromIndent(indentSpaces: number, indentSize: number): Depth {

View File

@@ -1,4 +1,4 @@
import type { ArrayHeaderInfo, Delimiter, Depth, ResolvedDecodeOptions } from '../types' import type { ArrayHeaderInfo, BlankLineInfo, Delimiter, Depth, ResolvedDecodeOptions } from '../types'
import type { LineCursor } from './scanner' import type { LineCursor } from './scanner'
import { COLON, LIST_ITEM_PREFIX } from '../constants' import { COLON, LIST_ITEM_PREFIX } from '../constants'
@@ -71,6 +71,44 @@ export function validateNoExtraTabularRows(
} }
} }
/**
* Validates that there are no blank lines within a specific line range and depth.
*
* @remarks
* In strict mode, blank lines inside arrays/tabular rows are not allowed.
*
* @param startLine The starting line number (inclusive)
* @param endLine The ending line number (inclusive)
* @param blankLines Array of blank line information
* @param strict Whether strict mode is enabled
* @param context Description of the context (e.g., "list array", "tabular array")
* @throws SyntaxError if blank lines are found in strict mode
*/
export function validateNoBlankLinesInRange(
startLine: number,
endLine: number,
blankLines: BlankLineInfo[],
strict: boolean,
context: string,
): void {
if (!strict)
return
// Find blank lines within the range
// Note: We don't filter by depth because ANY blank line between array items is an error,
// regardless of its indentation level
const blanksInRange = blankLines.filter(
blank => blank.lineNumber > startLine
&& blank.lineNumber < endLine,
)
if (blanksInRange.length > 0) {
throw new SyntaxError(
`Line ${blanksInRange[0]!.lineNumber}: Blank lines inside ${context} are not allowed in strict mode`,
)
}
}
/** /**
* Checks if a line represents a data row (as opposed to a key-value pair) in a tabular array. * Checks if a line represents a data row (as opposed to a key-value pair) in a tabular array.
* *

View File

@@ -27,13 +27,13 @@ export function encode(input: unknown, options?: EncodeOptions): string {
export function decode(input: string, options?: DecodeOptions): JsonValue { export function decode(input: string, options?: DecodeOptions): JsonValue {
const resolvedOptions = resolveDecodeOptions(options) const resolvedOptions = resolveDecodeOptions(options)
const lines = toParsedLines(input, resolvedOptions.indent, resolvedOptions.strict) const scanResult = toParsedLines(input, resolvedOptions.indent, resolvedOptions.strict)
if (lines.length === 0) { if (scanResult.lines.length === 0) {
throw new TypeError('Cannot decode empty input: input must be a non-empty string') throw new TypeError('Cannot decode empty input: input must be a non-empty string')
} }
const cursor = new LineCursor(lines) const cursor = new LineCursor(scanResult.lines, scanResult.blankLines)
return decodeValueFromLines(cursor, resolvedOptions) return decodeValueFromLines(cursor, resolvedOptions)
} }

View File

@@ -70,6 +70,13 @@ export interface ParsedLine {
depth: Depth depth: Depth
indent: number indent: number
content: string content: string
lineNumber: number
}
export interface BlankLineInfo {
lineNumber: number
indent: number
depth: Depth
} }
// #endregion // #endregion

View File

@@ -602,3 +602,83 @@ describe('strict mode: indentation validation', () => {
}) })
}) })
}) })
describe('blank lines in arrays', () => {
describe('strict mode: errors on blank lines inside arrays', () => {
it('throws on blank line inside list array', () => {
const teon = 'items[3]:\n - a\n\n - b\n - c'
expect(() => decode(teon)).toThrow(/blank line/i)
expect(() => decode(teon)).toThrow(/list array/i)
})
it('throws on blank line inside tabular array', () => {
const teon = 'items[2]{id}:\n 1\n\n 2'
expect(() => decode(teon)).toThrow(/blank line/i)
expect(() => decode(teon)).toThrow(/tabular array/i)
})
it('throws on multiple blank lines inside array', () => {
const teon = 'items[2]:\n - a\n\n\n - b'
expect(() => decode(teon)).toThrow(/blank line/i)
})
it('throws on blank line with spaces inside array', () => {
const teon = 'items[2]:\n - a\n \n - b'
expect(() => decode(teon)).toThrow(/blank line/i)
})
it('throws on blank line in nested list array', () => {
const teon = 'outer[2]:\n - inner[2]:\n - a\n\n - b\n - x'
expect(() => decode(teon)).toThrow(/blank line/i)
})
})
describe('accepts blank lines outside arrays', () => {
it('accepts blank line between root-level fields', () => {
const teon = 'a: 1\n\nb: 2'
expect(decode(teon)).toEqual({ a: 1, b: 2 })
})
it('accepts trailing newline at end of file', () => {
const teon = 'a: 1\n'
expect(decode(teon)).toEqual({ a: 1 })
})
it('accepts multiple trailing newlines', () => {
const teon = 'a: 1\n\n\n'
expect(decode(teon)).toEqual({ a: 1 })
})
it('accepts blank line after array ends', () => {
const teon = 'items[1]:\n - a\n\nb: 2'
expect(decode(teon)).toEqual({ items: ['a'], b: 2 })
})
it('accepts blank line between nested object fields', () => {
const teon = 'a:\n b: 1\n\n c: 2'
expect(decode(teon)).toEqual({ a: { b: 1, c: 2 } })
})
})
describe('non-strict mode: ignores blank lines', () => {
it('ignores blank lines inside list array', () => {
const teon = 'items[3]:\n - a\n\n - b\n - c'
expect(decode(teon, { strict: false })).toEqual({ items: ['a', 'b', 'c'] })
})
it('ignores blank lines inside tabular array', () => {
const teon = 'items[2]{id,name}:\n 1,Alice\n\n 2,Bob'
expect(decode(teon, { strict: false })).toEqual({
items: [
{ id: 1, name: 'Alice' },
{ id: 2, name: 'Bob' },
],
})
})
it('ignores multiple blank lines in arrays', () => {
const teon = 'items[2]:\n - a\n\n\n - b'
expect(decode(teon, { strict: false })).toEqual({ items: ['a', 'b'] })
})
})
})