feat(decoder): blank line validation for strict mode in arrays and tabular rows

This commit is contained in:
Johann Schopplich
2025-10-29 14:51:53 +01:00
parent e6c006bc67
commit bd06e5b1ea
6 changed files with 202 additions and 14 deletions

View File

@@ -3,7 +3,7 @@ import type { LineCursor } from './scanner'
import { COLON, DEFAULT_DELIMITER, LIST_ITEM_PREFIX } from '../constants'
import { findClosingQuote } from '../shared/string-utils'
import { isArrayHeaderAfterHyphen, isObjectFirstFieldAfterHyphen, mapRowValuesToPrimitives, parseArrayHeaderLine, parseDelimitedValues, parseKeyToken, parsePrimitiveToken } from './parser'
import { assertExpectedCount, validateNoExtraListItems, validateNoExtraTabularRows } from './validation'
import { assertExpectedCount, validateNoBlankLinesInRange, validateNoExtraListItems, validateNoExtraTabularRows } from './validation'
// #region Entry decoding
@@ -179,6 +179,10 @@ function decodeListArray(
const items: JsonValue[] = []
const itemDepth = baseDepth + 1
// Track line range for blank line validation
let startLine: number | undefined
let endLine: number | undefined
while (!cursor.atEnd() && items.length < header.length) {
const line = cursor.peek()
if (!line || line.depth < itemDepth) {
@@ -186,8 +190,20 @@ function decodeListArray(
}
if (line.depth === itemDepth && line.content.startsWith(LIST_ITEM_PREFIX)) {
// Track first and last item line numbers
if (startLine === undefined) {
startLine = line.lineNumber
}
endLine = line.lineNumber
const item = decodeListItem(cursor, itemDepth, header.delimiter, options)
items.push(item)
// Update endLine to the current cursor position (after item was decoded)
const currentLine = cursor.current()
if (currentLine) {
endLine = currentLine.lineNumber
}
}
else {
break
@@ -196,6 +212,17 @@ function decodeListArray(
assertExpectedCount(items.length, header.length, 'list array items', options)
// In strict mode, check for blank lines inside the array
if (options.strict && startLine !== undefined && endLine !== undefined) {
validateNoBlankLinesInRange(
startLine, // From first item line
endLine, // To last item line
cursor.getBlankLines(),
options.strict,
'list array',
)
}
// In strict mode, check for extra items
if (options.strict) {
validateNoExtraListItems(cursor, itemDepth, header.length)
@@ -213,6 +240,10 @@ function decodeTabularArray(
const objects: JsonObject[] = []
const rowDepth = baseDepth + 1
// Track line range for blank line validation
let startLine: number | undefined
let endLine: number | undefined
while (!cursor.atEnd() && objects.length < header.length) {
const line = cursor.peek()
if (!line || line.depth < rowDepth) {
@@ -220,6 +251,12 @@ function decodeTabularArray(
}
if (line.depth === rowDepth) {
// Track first and last row line numbers
if (startLine === undefined) {
startLine = line.lineNumber
}
endLine = line.lineNumber
cursor.advance()
const values = parseDelimitedValues(line.content, header.delimiter)
assertExpectedCount(values.length, header.fields!.length, 'tabular row values', options)
@@ -240,6 +277,17 @@ function decodeTabularArray(
assertExpectedCount(objects.length, header.length, 'tabular rows', options)
// In strict mode, check for blank lines inside the array
if (options.strict && startLine !== undefined && endLine !== undefined) {
validateNoBlankLinesInRange(
startLine, // From first row line
endLine, // To last row line
cursor.getBlankLines(),
options.strict,
'tabular array',
)
}
// In strict mode, check for extra rows
if (options.strict) {
validateNoExtraTabularRows(cursor, rowDepth, header)

View File

@@ -1,13 +1,24 @@
import type { Depth, ParsedLine } from '../types'
import type { BlankLineInfo, Depth, ParsedLine } from '../types'
import { SPACE, TAB } from '../constants'
export interface ScanResult {
lines: ParsedLine[]
blankLines: BlankLineInfo[]
}
export class LineCursor {
private lines: ParsedLine[]
private index: number
private blankLines: BlankLineInfo[]
constructor(lines: ParsedLine[]) {
constructor(lines: ParsedLine[], blankLines: BlankLineInfo[] = []) {
this.lines = lines
this.index = 0
this.blankLines = blankLines
}
getBlankLines(): BlankLineInfo[] {
return this.blankLines
}
peek(): ParsedLine | undefined {
@@ -50,16 +61,18 @@ export class LineCursor {
}
}
export function toParsedLines(source: string, indentSize: number, strict: boolean): ParsedLine[] {
export function toParsedLines(source: string, indentSize: number, strict: boolean): ScanResult {
if (!source.trim()) {
return []
return { lines: [], blankLines: [] }
}
const lines = source.split('\n')
const parsed: ParsedLine[] = []
const blankLines: BlankLineInfo[] = []
for (let i = 0; i < lines.length; i++) {
const raw = lines[i]!
const lineNumber = i + 1
let indent = 0
while (indent < raw.length && raw[indent] === SPACE) {
indent++
@@ -67,8 +80,10 @@ export function toParsedLines(source: string, indentSize: number, strict: boolea
const content = raw.slice(indent)
// Skip empty lines or lines with only whitespace
// Track blank lines
if (!content.trim()) {
const depth = computeDepthFromIndent(indent, indentSize)
blankLines.push({ lineNumber, indent, depth })
continue
}
@@ -84,19 +99,19 @@ export function toParsedLines(source: string, indentSize: number, strict: boolea
// Check for tabs in leading whitespace (before actual content)
if (raw.slice(0, wsEnd).includes(TAB)) {
throw new SyntaxError(`Line ${i + 1}: Tabs are not allowed in indentation in strict mode`)
throw new SyntaxError(`Line ${lineNumber}: Tabs are not allowed in indentation in strict mode`)
}
// Check for exact multiples of indentSize
if (indent > 0 && indent % indentSize !== 0) {
throw new SyntaxError(`Line ${i + 1}: Indentation must be exact multiple of ${indentSize}, but found ${indent} spaces`)
throw new SyntaxError(`Line ${lineNumber}: Indentation must be exact multiple of ${indentSize}, but found ${indent} spaces`)
}
}
parsed.push({ raw, indent, content, depth })
parsed.push({ raw, indent, content, depth, lineNumber })
}
return parsed
return { lines: parsed, blankLines }
}
function computeDepthFromIndent(indentSpaces: number, indentSize: number): Depth {

View File

@@ -1,4 +1,4 @@
import type { ArrayHeaderInfo, Delimiter, Depth, ResolvedDecodeOptions } from '../types'
import type { ArrayHeaderInfo, BlankLineInfo, Delimiter, Depth, ResolvedDecodeOptions } from '../types'
import type { LineCursor } from './scanner'
import { COLON, LIST_ITEM_PREFIX } from '../constants'
@@ -71,6 +71,44 @@ export function validateNoExtraTabularRows(
}
}
/**
* Validates that there are no blank lines within a specific line range and depth.
*
* @remarks
* In strict mode, blank lines inside arrays/tabular rows are not allowed.
*
* @param startLine The starting line number (inclusive)
* @param endLine The ending line number (inclusive)
* @param blankLines Array of blank line information
* @param strict Whether strict mode is enabled
* @param context Description of the context (e.g., "list array", "tabular array")
* @throws SyntaxError if blank lines are found in strict mode
*/
export function validateNoBlankLinesInRange(
startLine: number,
endLine: number,
blankLines: BlankLineInfo[],
strict: boolean,
context: string,
): void {
if (!strict)
return
// Find blank lines within the range
// Note: We don't filter by depth because ANY blank line between array items is an error,
// regardless of its indentation level
const blanksInRange = blankLines.filter(
blank => blank.lineNumber > startLine
&& blank.lineNumber < endLine,
)
if (blanksInRange.length > 0) {
throw new SyntaxError(
`Line ${blanksInRange[0]!.lineNumber}: Blank lines inside ${context} are not allowed in strict mode`,
)
}
}
/**
* Checks if a line represents a data row (as opposed to a key-value pair) in a tabular array.
*

View File

@@ -27,13 +27,13 @@ export function encode(input: unknown, options?: EncodeOptions): string {
export function decode(input: string, options?: DecodeOptions): JsonValue {
const resolvedOptions = resolveDecodeOptions(options)
const lines = toParsedLines(input, resolvedOptions.indent, resolvedOptions.strict)
const scanResult = toParsedLines(input, resolvedOptions.indent, resolvedOptions.strict)
if (lines.length === 0) {
if (scanResult.lines.length === 0) {
throw new TypeError('Cannot decode empty input: input must be a non-empty string')
}
const cursor = new LineCursor(lines)
const cursor = new LineCursor(scanResult.lines, scanResult.blankLines)
return decodeValueFromLines(cursor, resolvedOptions)
}

View File

@@ -70,6 +70,13 @@ export interface ParsedLine {
depth: Depth
indent: number
content: string
lineNumber: number
}
export interface BlankLineInfo {
lineNumber: number
indent: number
depth: Depth
}
// #endregion