mirror of
https://github.com/voson-wang/toon.git
synced 2026-01-29 23:34:10 +08:00
feat(decoder): blank line validation for strict mode in arrays and tabular rows
This commit is contained in:
@@ -3,7 +3,7 @@ import type { LineCursor } from './scanner'
|
|||||||
import { COLON, DEFAULT_DELIMITER, LIST_ITEM_PREFIX } from '../constants'
|
import { COLON, DEFAULT_DELIMITER, LIST_ITEM_PREFIX } from '../constants'
|
||||||
import { findClosingQuote } from '../shared/string-utils'
|
import { findClosingQuote } from '../shared/string-utils'
|
||||||
import { isArrayHeaderAfterHyphen, isObjectFirstFieldAfterHyphen, mapRowValuesToPrimitives, parseArrayHeaderLine, parseDelimitedValues, parseKeyToken, parsePrimitiveToken } from './parser'
|
import { isArrayHeaderAfterHyphen, isObjectFirstFieldAfterHyphen, mapRowValuesToPrimitives, parseArrayHeaderLine, parseDelimitedValues, parseKeyToken, parsePrimitiveToken } from './parser'
|
||||||
import { assertExpectedCount, validateNoExtraListItems, validateNoExtraTabularRows } from './validation'
|
import { assertExpectedCount, validateNoBlankLinesInRange, validateNoExtraListItems, validateNoExtraTabularRows } from './validation'
|
||||||
|
|
||||||
// #region Entry decoding
|
// #region Entry decoding
|
||||||
|
|
||||||
@@ -179,6 +179,10 @@ function decodeListArray(
|
|||||||
const items: JsonValue[] = []
|
const items: JsonValue[] = []
|
||||||
const itemDepth = baseDepth + 1
|
const itemDepth = baseDepth + 1
|
||||||
|
|
||||||
|
// Track line range for blank line validation
|
||||||
|
let startLine: number | undefined
|
||||||
|
let endLine: number | undefined
|
||||||
|
|
||||||
while (!cursor.atEnd() && items.length < header.length) {
|
while (!cursor.atEnd() && items.length < header.length) {
|
||||||
const line = cursor.peek()
|
const line = cursor.peek()
|
||||||
if (!line || line.depth < itemDepth) {
|
if (!line || line.depth < itemDepth) {
|
||||||
@@ -186,8 +190,20 @@ function decodeListArray(
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (line.depth === itemDepth && line.content.startsWith(LIST_ITEM_PREFIX)) {
|
if (line.depth === itemDepth && line.content.startsWith(LIST_ITEM_PREFIX)) {
|
||||||
|
// Track first and last item line numbers
|
||||||
|
if (startLine === undefined) {
|
||||||
|
startLine = line.lineNumber
|
||||||
|
}
|
||||||
|
endLine = line.lineNumber
|
||||||
|
|
||||||
const item = decodeListItem(cursor, itemDepth, header.delimiter, options)
|
const item = decodeListItem(cursor, itemDepth, header.delimiter, options)
|
||||||
items.push(item)
|
items.push(item)
|
||||||
|
|
||||||
|
// Update endLine to the current cursor position (after item was decoded)
|
||||||
|
const currentLine = cursor.current()
|
||||||
|
if (currentLine) {
|
||||||
|
endLine = currentLine.lineNumber
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
break
|
break
|
||||||
@@ -196,6 +212,17 @@ function decodeListArray(
|
|||||||
|
|
||||||
assertExpectedCount(items.length, header.length, 'list array items', options)
|
assertExpectedCount(items.length, header.length, 'list array items', options)
|
||||||
|
|
||||||
|
// In strict mode, check for blank lines inside the array
|
||||||
|
if (options.strict && startLine !== undefined && endLine !== undefined) {
|
||||||
|
validateNoBlankLinesInRange(
|
||||||
|
startLine, // From first item line
|
||||||
|
endLine, // To last item line
|
||||||
|
cursor.getBlankLines(),
|
||||||
|
options.strict,
|
||||||
|
'list array',
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
// In strict mode, check for extra items
|
// In strict mode, check for extra items
|
||||||
if (options.strict) {
|
if (options.strict) {
|
||||||
validateNoExtraListItems(cursor, itemDepth, header.length)
|
validateNoExtraListItems(cursor, itemDepth, header.length)
|
||||||
@@ -213,6 +240,10 @@ function decodeTabularArray(
|
|||||||
const objects: JsonObject[] = []
|
const objects: JsonObject[] = []
|
||||||
const rowDepth = baseDepth + 1
|
const rowDepth = baseDepth + 1
|
||||||
|
|
||||||
|
// Track line range for blank line validation
|
||||||
|
let startLine: number | undefined
|
||||||
|
let endLine: number | undefined
|
||||||
|
|
||||||
while (!cursor.atEnd() && objects.length < header.length) {
|
while (!cursor.atEnd() && objects.length < header.length) {
|
||||||
const line = cursor.peek()
|
const line = cursor.peek()
|
||||||
if (!line || line.depth < rowDepth) {
|
if (!line || line.depth < rowDepth) {
|
||||||
@@ -220,6 +251,12 @@ function decodeTabularArray(
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (line.depth === rowDepth) {
|
if (line.depth === rowDepth) {
|
||||||
|
// Track first and last row line numbers
|
||||||
|
if (startLine === undefined) {
|
||||||
|
startLine = line.lineNumber
|
||||||
|
}
|
||||||
|
endLine = line.lineNumber
|
||||||
|
|
||||||
cursor.advance()
|
cursor.advance()
|
||||||
const values = parseDelimitedValues(line.content, header.delimiter)
|
const values = parseDelimitedValues(line.content, header.delimiter)
|
||||||
assertExpectedCount(values.length, header.fields!.length, 'tabular row values', options)
|
assertExpectedCount(values.length, header.fields!.length, 'tabular row values', options)
|
||||||
@@ -240,6 +277,17 @@ function decodeTabularArray(
|
|||||||
|
|
||||||
assertExpectedCount(objects.length, header.length, 'tabular rows', options)
|
assertExpectedCount(objects.length, header.length, 'tabular rows', options)
|
||||||
|
|
||||||
|
// In strict mode, check for blank lines inside the array
|
||||||
|
if (options.strict && startLine !== undefined && endLine !== undefined) {
|
||||||
|
validateNoBlankLinesInRange(
|
||||||
|
startLine, // From first row line
|
||||||
|
endLine, // To last row line
|
||||||
|
cursor.getBlankLines(),
|
||||||
|
options.strict,
|
||||||
|
'tabular array',
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
// In strict mode, check for extra rows
|
// In strict mode, check for extra rows
|
||||||
if (options.strict) {
|
if (options.strict) {
|
||||||
validateNoExtraTabularRows(cursor, rowDepth, header)
|
validateNoExtraTabularRows(cursor, rowDepth, header)
|
||||||
|
|||||||
@@ -1,13 +1,24 @@
|
|||||||
import type { Depth, ParsedLine } from '../types'
|
import type { BlankLineInfo, Depth, ParsedLine } from '../types'
|
||||||
import { SPACE, TAB } from '../constants'
|
import { SPACE, TAB } from '../constants'
|
||||||
|
|
||||||
|
export interface ScanResult {
|
||||||
|
lines: ParsedLine[]
|
||||||
|
blankLines: BlankLineInfo[]
|
||||||
|
}
|
||||||
|
|
||||||
export class LineCursor {
|
export class LineCursor {
|
||||||
private lines: ParsedLine[]
|
private lines: ParsedLine[]
|
||||||
private index: number
|
private index: number
|
||||||
|
private blankLines: BlankLineInfo[]
|
||||||
|
|
||||||
constructor(lines: ParsedLine[]) {
|
constructor(lines: ParsedLine[], blankLines: BlankLineInfo[] = []) {
|
||||||
this.lines = lines
|
this.lines = lines
|
||||||
this.index = 0
|
this.index = 0
|
||||||
|
this.blankLines = blankLines
|
||||||
|
}
|
||||||
|
|
||||||
|
getBlankLines(): BlankLineInfo[] {
|
||||||
|
return this.blankLines
|
||||||
}
|
}
|
||||||
|
|
||||||
peek(): ParsedLine | undefined {
|
peek(): ParsedLine | undefined {
|
||||||
@@ -50,16 +61,18 @@ export class LineCursor {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export function toParsedLines(source: string, indentSize: number, strict: boolean): ParsedLine[] {
|
export function toParsedLines(source: string, indentSize: number, strict: boolean): ScanResult {
|
||||||
if (!source.trim()) {
|
if (!source.trim()) {
|
||||||
return []
|
return { lines: [], blankLines: [] }
|
||||||
}
|
}
|
||||||
|
|
||||||
const lines = source.split('\n')
|
const lines = source.split('\n')
|
||||||
const parsed: ParsedLine[] = []
|
const parsed: ParsedLine[] = []
|
||||||
|
const blankLines: BlankLineInfo[] = []
|
||||||
|
|
||||||
for (let i = 0; i < lines.length; i++) {
|
for (let i = 0; i < lines.length; i++) {
|
||||||
const raw = lines[i]!
|
const raw = lines[i]!
|
||||||
|
const lineNumber = i + 1
|
||||||
let indent = 0
|
let indent = 0
|
||||||
while (indent < raw.length && raw[indent] === SPACE) {
|
while (indent < raw.length && raw[indent] === SPACE) {
|
||||||
indent++
|
indent++
|
||||||
@@ -67,8 +80,10 @@ export function toParsedLines(source: string, indentSize: number, strict: boolea
|
|||||||
|
|
||||||
const content = raw.slice(indent)
|
const content = raw.slice(indent)
|
||||||
|
|
||||||
// Skip empty lines or lines with only whitespace
|
// Track blank lines
|
||||||
if (!content.trim()) {
|
if (!content.trim()) {
|
||||||
|
const depth = computeDepthFromIndent(indent, indentSize)
|
||||||
|
blankLines.push({ lineNumber, indent, depth })
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -84,19 +99,19 @@ export function toParsedLines(source: string, indentSize: number, strict: boolea
|
|||||||
|
|
||||||
// Check for tabs in leading whitespace (before actual content)
|
// Check for tabs in leading whitespace (before actual content)
|
||||||
if (raw.slice(0, wsEnd).includes(TAB)) {
|
if (raw.slice(0, wsEnd).includes(TAB)) {
|
||||||
throw new SyntaxError(`Line ${i + 1}: Tabs are not allowed in indentation in strict mode`)
|
throw new SyntaxError(`Line ${lineNumber}: Tabs are not allowed in indentation in strict mode`)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check for exact multiples of indentSize
|
// Check for exact multiples of indentSize
|
||||||
if (indent > 0 && indent % indentSize !== 0) {
|
if (indent > 0 && indent % indentSize !== 0) {
|
||||||
throw new SyntaxError(`Line ${i + 1}: Indentation must be exact multiple of ${indentSize}, but found ${indent} spaces`)
|
throw new SyntaxError(`Line ${lineNumber}: Indentation must be exact multiple of ${indentSize}, but found ${indent} spaces`)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
parsed.push({ raw, indent, content, depth })
|
parsed.push({ raw, indent, content, depth, lineNumber })
|
||||||
}
|
}
|
||||||
|
|
||||||
return parsed
|
return { lines: parsed, blankLines }
|
||||||
}
|
}
|
||||||
|
|
||||||
function computeDepthFromIndent(indentSpaces: number, indentSize: number): Depth {
|
function computeDepthFromIndent(indentSpaces: number, indentSize: number): Depth {
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
import type { ArrayHeaderInfo, Delimiter, Depth, ResolvedDecodeOptions } from '../types'
|
import type { ArrayHeaderInfo, BlankLineInfo, Delimiter, Depth, ResolvedDecodeOptions } from '../types'
|
||||||
import type { LineCursor } from './scanner'
|
import type { LineCursor } from './scanner'
|
||||||
import { COLON, LIST_ITEM_PREFIX } from '../constants'
|
import { COLON, LIST_ITEM_PREFIX } from '../constants'
|
||||||
|
|
||||||
@@ -71,6 +71,44 @@ export function validateNoExtraTabularRows(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Validates that there are no blank lines within a specific line range and depth.
|
||||||
|
*
|
||||||
|
* @remarks
|
||||||
|
* In strict mode, blank lines inside arrays/tabular rows are not allowed.
|
||||||
|
*
|
||||||
|
* @param startLine The starting line number (inclusive)
|
||||||
|
* @param endLine The ending line number (inclusive)
|
||||||
|
* @param blankLines Array of blank line information
|
||||||
|
* @param strict Whether strict mode is enabled
|
||||||
|
* @param context Description of the context (e.g., "list array", "tabular array")
|
||||||
|
* @throws SyntaxError if blank lines are found in strict mode
|
||||||
|
*/
|
||||||
|
export function validateNoBlankLinesInRange(
|
||||||
|
startLine: number,
|
||||||
|
endLine: number,
|
||||||
|
blankLines: BlankLineInfo[],
|
||||||
|
strict: boolean,
|
||||||
|
context: string,
|
||||||
|
): void {
|
||||||
|
if (!strict)
|
||||||
|
return
|
||||||
|
|
||||||
|
// Find blank lines within the range
|
||||||
|
// Note: We don't filter by depth because ANY blank line between array items is an error,
|
||||||
|
// regardless of its indentation level
|
||||||
|
const blanksInRange = blankLines.filter(
|
||||||
|
blank => blank.lineNumber > startLine
|
||||||
|
&& blank.lineNumber < endLine,
|
||||||
|
)
|
||||||
|
|
||||||
|
if (blanksInRange.length > 0) {
|
||||||
|
throw new SyntaxError(
|
||||||
|
`Line ${blanksInRange[0]!.lineNumber}: Blank lines inside ${context} are not allowed in strict mode`,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Checks if a line represents a data row (as opposed to a key-value pair) in a tabular array.
|
* Checks if a line represents a data row (as opposed to a key-value pair) in a tabular array.
|
||||||
*
|
*
|
||||||
|
|||||||
@@ -27,13 +27,13 @@ export function encode(input: unknown, options?: EncodeOptions): string {
|
|||||||
|
|
||||||
export function decode(input: string, options?: DecodeOptions): JsonValue {
|
export function decode(input: string, options?: DecodeOptions): JsonValue {
|
||||||
const resolvedOptions = resolveDecodeOptions(options)
|
const resolvedOptions = resolveDecodeOptions(options)
|
||||||
const lines = toParsedLines(input, resolvedOptions.indent, resolvedOptions.strict)
|
const scanResult = toParsedLines(input, resolvedOptions.indent, resolvedOptions.strict)
|
||||||
|
|
||||||
if (lines.length === 0) {
|
if (scanResult.lines.length === 0) {
|
||||||
throw new TypeError('Cannot decode empty input: input must be a non-empty string')
|
throw new TypeError('Cannot decode empty input: input must be a non-empty string')
|
||||||
}
|
}
|
||||||
|
|
||||||
const cursor = new LineCursor(lines)
|
const cursor = new LineCursor(scanResult.lines, scanResult.blankLines)
|
||||||
return decodeValueFromLines(cursor, resolvedOptions)
|
return decodeValueFromLines(cursor, resolvedOptions)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -70,6 +70,13 @@ export interface ParsedLine {
|
|||||||
depth: Depth
|
depth: Depth
|
||||||
indent: number
|
indent: number
|
||||||
content: string
|
content: string
|
||||||
|
lineNumber: number
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface BlankLineInfo {
|
||||||
|
lineNumber: number
|
||||||
|
indent: number
|
||||||
|
depth: Depth
|
||||||
}
|
}
|
||||||
|
|
||||||
// #endregion
|
// #endregion
|
||||||
|
|||||||
@@ -602,3 +602,83 @@ describe('strict mode: indentation validation', () => {
|
|||||||
})
|
})
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
describe('blank lines in arrays', () => {
|
||||||
|
describe('strict mode: errors on blank lines inside arrays', () => {
|
||||||
|
it('throws on blank line inside list array', () => {
|
||||||
|
const teon = 'items[3]:\n - a\n\n - b\n - c'
|
||||||
|
expect(() => decode(teon)).toThrow(/blank line/i)
|
||||||
|
expect(() => decode(teon)).toThrow(/list array/i)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('throws on blank line inside tabular array', () => {
|
||||||
|
const teon = 'items[2]{id}:\n 1\n\n 2'
|
||||||
|
expect(() => decode(teon)).toThrow(/blank line/i)
|
||||||
|
expect(() => decode(teon)).toThrow(/tabular array/i)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('throws on multiple blank lines inside array', () => {
|
||||||
|
const teon = 'items[2]:\n - a\n\n\n - b'
|
||||||
|
expect(() => decode(teon)).toThrow(/blank line/i)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('throws on blank line with spaces inside array', () => {
|
||||||
|
const teon = 'items[2]:\n - a\n \n - b'
|
||||||
|
expect(() => decode(teon)).toThrow(/blank line/i)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('throws on blank line in nested list array', () => {
|
||||||
|
const teon = 'outer[2]:\n - inner[2]:\n - a\n\n - b\n - x'
|
||||||
|
expect(() => decode(teon)).toThrow(/blank line/i)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('accepts blank lines outside arrays', () => {
|
||||||
|
it('accepts blank line between root-level fields', () => {
|
||||||
|
const teon = 'a: 1\n\nb: 2'
|
||||||
|
expect(decode(teon)).toEqual({ a: 1, b: 2 })
|
||||||
|
})
|
||||||
|
|
||||||
|
it('accepts trailing newline at end of file', () => {
|
||||||
|
const teon = 'a: 1\n'
|
||||||
|
expect(decode(teon)).toEqual({ a: 1 })
|
||||||
|
})
|
||||||
|
|
||||||
|
it('accepts multiple trailing newlines', () => {
|
||||||
|
const teon = 'a: 1\n\n\n'
|
||||||
|
expect(decode(teon)).toEqual({ a: 1 })
|
||||||
|
})
|
||||||
|
|
||||||
|
it('accepts blank line after array ends', () => {
|
||||||
|
const teon = 'items[1]:\n - a\n\nb: 2'
|
||||||
|
expect(decode(teon)).toEqual({ items: ['a'], b: 2 })
|
||||||
|
})
|
||||||
|
|
||||||
|
it('accepts blank line between nested object fields', () => {
|
||||||
|
const teon = 'a:\n b: 1\n\n c: 2'
|
||||||
|
expect(decode(teon)).toEqual({ a: { b: 1, c: 2 } })
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('non-strict mode: ignores blank lines', () => {
|
||||||
|
it('ignores blank lines inside list array', () => {
|
||||||
|
const teon = 'items[3]:\n - a\n\n - b\n - c'
|
||||||
|
expect(decode(teon, { strict: false })).toEqual({ items: ['a', 'b', 'c'] })
|
||||||
|
})
|
||||||
|
|
||||||
|
it('ignores blank lines inside tabular array', () => {
|
||||||
|
const teon = 'items[2]{id,name}:\n 1,Alice\n\n 2,Bob'
|
||||||
|
expect(decode(teon, { strict: false })).toEqual({
|
||||||
|
items: [
|
||||||
|
{ id: 1, name: 'Alice' },
|
||||||
|
{ id: 2, name: 'Bob' },
|
||||||
|
],
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
it('ignores multiple blank lines in arrays', () => {
|
||||||
|
const teon = 'items[2]:\n - a\n\n\n - b'
|
||||||
|
expect(decode(teon, { strict: false })).toEqual({ items: ['a', 'b'] })
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|||||||
Reference in New Issue
Block a user