refactor: decoding source files

This commit is contained in:
Johann Schopplich
2025-10-29 10:12:13 +01:00
parent c4f00bd69f
commit 8d238f8eeb
6 changed files with 268 additions and 124 deletions

View File

@@ -1,6 +1,6 @@
import type { LineCursor } from './scanner'
import type { import type {
ArrayHeaderInfo, ArrayHeaderInfo,
Delimiter,
Depth, Depth,
JsonArray, JsonArray,
JsonObject, JsonObject,
@@ -8,12 +8,13 @@ import type {
JsonValue, JsonValue,
ParsedLine, ParsedLine,
ResolvedDecodeOptions, ResolvedDecodeOptions,
} from './types' } from '../types'
import type { LineCursor } from './scanner'
import { import {
COLON, COLON,
DEFAULT_DELIMITER, DEFAULT_DELIMITER,
LIST_ITEM_PREFIX, LIST_ITEM_PREFIX,
} from './constants' } from '../constants'
import { import {
isArrayHeaderAfterHyphen, isArrayHeaderAfterHyphen,
isObjectFirstFieldAfterHyphen, isObjectFirstFieldAfterHyphen,
@@ -23,6 +24,12 @@ import {
parseKeyToken, parseKeyToken,
parsePrimitiveToken, parsePrimitiveToken,
} from './parser' } from './parser'
import { findClosingQuote } from './string-utils'
import {
assertExpectedCount,
validateNoExtraListItems,
validateNoExtraTabularRows,
} from './validation'
// #region Entry decoding // #region Entry decoding
@@ -33,7 +40,7 @@ export function decodeValueFromLines(cursor: LineCursor, options: ResolvedDecode
} }
// Check for root array // Check for root array
if (isRootArrayHeaderLine(first)) { if (isArrayHeaderAfterHyphen(first.content)) {
const headerInfo = parseArrayHeaderLine(first.content, DEFAULT_DELIMITER) const headerInfo = parseArrayHeaderLine(first.content, DEFAULT_DELIMITER)
if (headerInfo) { if (headerInfo) {
cursor.advance() // Move past the header line cursor.advance() // Move past the header line
@@ -50,29 +57,18 @@ export function decodeValueFromLines(cursor: LineCursor, options: ResolvedDecode
return decodeObject(cursor, 0, options) return decodeObject(cursor, 0, options)
} }
function isRootArrayHeaderLine(line: ParsedLine): boolean {
return isArrayHeaderAfterHyphen(line.content)
}
function isKeyValueLine(line: ParsedLine): boolean { function isKeyValueLine(line: ParsedLine): boolean {
const content = line.content const content = line.content
// Look for unquoted colon or quoted key followed by colon // Look for unquoted colon or quoted key followed by colon
if (content.startsWith('"')) { if (content.startsWith('"')) {
// Quoted key // Quoted key - find the closing quote
let i = 1 const closingQuoteIndex = findClosingQuote(content, 0)
while (i < content.length) { if (closingQuoteIndex === -1) {
if (content[i] === '\\' && i + 1 < content.length) {
i += 2
continue
}
if (content[i] === '"') {
// Found end of quoted key, check for colon
return content[i + 1] === COLON
}
i++
}
return false return false
} }
// Check if there's a colon after the quoted key
return closingQuoteIndex + 1 < content.length && content[closingQuoteIndex + 1] === COLON
}
else { else {
// Unquoted key - look for first colon not inside quotes // Unquoted key - look for first colon not inside quotes
return content.includes(COLON) return content.includes(COLON)
@@ -227,11 +223,8 @@ function decodeListArray(
assertExpectedCount(items.length, header.length, 'list array items', options) assertExpectedCount(items.length, header.length, 'list array items', options)
// In strict mode, check for extra items // In strict mode, check for extra items
if (options.strict && !cursor.atEnd()) { if (options.strict) {
const nextLine = cursor.peek() validateNoExtraListItems(cursor, itemDepth, header.length)
if (nextLine && nextLine.depth === itemDepth && nextLine.content.startsWith(LIST_ITEM_PREFIX)) {
throw new RangeError(`Expected ${header.length} list array items, but found more`)
}
} }
return items return items
@@ -274,30 +267,8 @@ function decodeTabularArray(
assertExpectedCount(objects.length, header.length, 'tabular rows', options) assertExpectedCount(objects.length, header.length, 'tabular rows', options)
// In strict mode, check for extra rows // In strict mode, check for extra rows
if (options.strict && !cursor.atEnd()) { if (options.strict) {
const nextLine = cursor.peek() validateNoExtraTabularRows(cursor, rowDepth, header)
if (nextLine && nextLine.depth === rowDepth && !nextLine.content.startsWith(LIST_ITEM_PREFIX)) {
// A key-value pair has a colon (and if it has delimiter, colon comes first)
// A data row either has no colon, or has delimiter before colon
const hasColon = nextLine.content.includes(COLON)
const hasDelimiter = nextLine.content.includes(header.delimiter)
if (!hasColon) {
// No colon = data row (for single-field tables)
throw new RangeError(`Expected ${header.length} tabular rows, but found more`)
}
else if (hasDelimiter) {
// Has both colon and delimiter - check which comes first
const colonPos = nextLine.content.indexOf(COLON)
const delimiterPos = nextLine.content.indexOf(header.delimiter)
if (delimiterPos < colonPos) {
// Delimiter before colon = data row
throw new RangeError(`Expected ${header.length} tabular rows, but found more`)
}
// Colon before delimiter = key-value pair, OK
}
// Has colon but no delimiter = key-value pair, OK
}
} }
return objects return objects
@@ -310,7 +281,7 @@ function decodeTabularArray(
function decodeListItem( function decodeListItem(
cursor: LineCursor, cursor: LineCursor,
baseDepth: Depth, baseDepth: Depth,
activeDelimiter: string, activeDelimiter: Delimiter,
options: ResolvedDecodeOptions, options: ResolvedDecodeOptions,
): JsonValue { ): JsonValue {
const line = cursor.next() const line = cursor.next()
@@ -322,7 +293,7 @@ function decodeListItem(
// Check for array header after hyphen // Check for array header after hyphen
if (isArrayHeaderAfterHyphen(afterHyphen)) { if (isArrayHeaderAfterHyphen(afterHyphen)) {
const arrayHeader = parseArrayHeaderLine(afterHyphen, activeDelimiter as any) const arrayHeader = parseArrayHeaderLine(afterHyphen, activeDelimiter)
if (arrayHeader) { if (arrayHeader) {
return decodeArrayFromHeader(arrayHeader.header, arrayHeader.inlineValues, cursor, baseDepth, options) return decodeArrayFromHeader(arrayHeader.header, arrayHeader.inlineValues, cursor, baseDepth, options)
} }
@@ -344,7 +315,7 @@ function decodeObjectFromListItem(
options: ResolvedDecodeOptions, options: ResolvedDecodeOptions,
): JsonObject { ): JsonObject {
const afterHyphen = firstLine.content.slice(LIST_ITEM_PREFIX.length) const afterHyphen = firstLine.content.slice(LIST_ITEM_PREFIX.length)
const { key, value, followDepth } = decodeFirstFieldOnHyphen(afterHyphen, cursor, baseDepth, options) const { key, value, followDepth } = decodeKeyValue(afterHyphen, cursor, baseDepth, options)
const obj: JsonObject = { [key]: value } const obj: JsonObject = { [key]: value }
@@ -367,23 +338,4 @@ function decodeObjectFromListItem(
return obj return obj
} }
function decodeFirstFieldOnHyphen(
rest: string,
cursor: LineCursor,
baseDepth: Depth,
options: ResolvedDecodeOptions,
): { key: string, value: JsonValue, followDepth: Depth } {
return decodeKeyValue(rest, cursor, baseDepth, options)
}
// #endregion
// #region Validation
function assertExpectedCount(actual: number, expected: number, what: string, options: ResolvedDecodeOptions): void {
if (options.strict && actual !== expected) {
throw new RangeError(`Expected ${expected} ${what}, but got ${actual}`)
}
}
// #endregion // #endregion

View File

@@ -2,7 +2,7 @@ import type {
ArrayHeaderInfo, ArrayHeaderInfo,
Delimiter, Delimiter,
JsonPrimitive, JsonPrimitive,
} from './types' } from '../types'
import { import {
BACKSLASH, BACKSLASH,
CARRIAGE_RETURN, CARRIAGE_RETURN,
@@ -20,7 +20,8 @@ import {
PIPE, PIPE,
TAB, TAB,
TRUE_LITERAL, TRUE_LITERAL,
} from './constants' } from '../constants'
import { findClosingQuote, hasUnquotedChar } from './string-utils'
// #region Array header parsing // #region Array header parsing
@@ -246,26 +247,19 @@ export function parseStringLiteral(token: string): string {
if (trimmed.startsWith(DOUBLE_QUOTE)) { if (trimmed.startsWith(DOUBLE_QUOTE)) {
// Find the closing quote, accounting for escaped quotes // Find the closing quote, accounting for escaped quotes
let i = 1 const closingQuoteIndex = findClosingQuote(trimmed, 0)
while (i < trimmed.length) {
if (trimmed[i] === BACKSLASH && i + 1 < trimmed.length) { if (closingQuoteIndex === -1) {
// Skip escaped character // No closing quote was found
i += 2 throw new SyntaxError('Unterminated string: missing closing quote')
continue
}
if (trimmed[i] === DOUBLE_QUOTE) {
// Found closing quote
if (i !== trimmed.length - 1) {
throw new SyntaxError('Unexpected characters after closing quote')
}
const content = trimmed.slice(1, i)
return unescapeString(content)
}
i++
} }
// If we get here, no closing quote was found if (closingQuoteIndex !== trimmed.length - 1) {
throw new SyntaxError('Unterminated string: missing closing quote') throw new SyntaxError('Unexpected characters after closing quote')
}
const content = trimmed.slice(1, closingQuoteIndex)
return unescapeString(content)
} }
return trimmed return trimmed
@@ -338,20 +332,17 @@ export function parseUnquotedKey(content: string, start: number): { key: string,
} }
export function parseQuotedKey(content: string, start: number): { key: string, end: number } { export function parseQuotedKey(content: string, start: number): { key: string, end: number } {
let i = start + 1 // Skip opening quote // Find the closing quote, accounting for escaped quotes
let keyContent = '' const closingQuoteIndex = findClosingQuote(content, start)
while (i < content.length) { if (closingQuoteIndex === -1) {
if (content[i] === BACKSLASH && i + 1 < content.length) { throw new SyntaxError('Unterminated quoted key')
keyContent += content[i]! + content[i + 1]
i += 2
continue
} }
if (content[i] === DOUBLE_QUOTE) { // Extract and unescape the key content
// Found closing quote const keyContent = content.slice(start + 1, closingQuoteIndex)
const key = unescapeString(keyContent) const key = unescapeString(keyContent)
let end = i + 1 let end = closingQuoteIndex + 1
// Validate and skip colon after quoted key // Validate and skip colon after quoted key
if (end >= content.length || content[end] !== COLON) { if (end >= content.length || content[end] !== COLON) {
@@ -362,13 +353,6 @@ export function parseQuotedKey(content: string, start: number): { key: string, e
return { key, end } return { key, end }
} }
keyContent += content[i]
i++
}
throw new SyntaxError('Unterminated quoted key')
}
export function parseKeyToken(content: string, start: number): { key: string, end: number } { export function parseKeyToken(content: string, start: number): { key: string, end: number } {
if (content[start] === DOUBLE_QUOTE) { if (content[start] === DOUBLE_QUOTE) {
return parseQuotedKey(content, start) return parseQuotedKey(content, start)
@@ -383,11 +367,11 @@ export function parseKeyToken(content: string, start: number): { key: string, en
// #region Array content detection helpers // #region Array content detection helpers
export function isArrayHeaderAfterHyphen(content: string): boolean { export function isArrayHeaderAfterHyphen(content: string): boolean {
return content.trim().startsWith(OPEN_BRACKET) && content.includes(COLON) return content.trim().startsWith(OPEN_BRACKET) && hasUnquotedChar(content, COLON)
} }
export function isObjectFirstFieldAfterHyphen(content: string): boolean { export function isObjectFirstFieldAfterHyphen(content: string): boolean {
return content.includes(COLON) return hasUnquotedChar(content, COLON)
} }
// #endregion // #endregion

View File

@@ -1,5 +1,5 @@
import type { Depth, ParsedLine } from './types' import type { Depth, ParsedLine } from '../types'
import { SPACE } from './constants' import { SPACE } from '../constants'
export class LineCursor { export class LineCursor {
private lines: ParsedLine[] private lines: ParsedLine[]
@@ -33,6 +33,21 @@ export class LineCursor {
get length(): number { get length(): number {
return this.lines.length return this.lines.length
} }
peekAtDepth(targetDepth: Depth): ParsedLine | undefined {
const line = this.peek()
if (!line || line.depth < targetDepth) {
return undefined
}
if (line.depth === targetDepth) {
return line
}
return undefined
}
hasMoreAtDepth(targetDepth: Depth): boolean {
return this.peekAtDepth(targetDepth) !== undefined
}
} }
export function toParsedLines(source: string, indentSize: number): ParsedLine[] { export function toParsedLines(source: string, indentSize: number): ParsedLine[] {

View File

@@ -0,0 +1,96 @@
import { BACKSLASH, DOUBLE_QUOTE } from '../constants'
/**
* Finds the index of the closing double quote in a string, accounting for escape sequences.
*
* @param content The string to search in
* @param start The index of the opening quote
* @returns The index of the closing quote, or -1 if not found
*/
export function findClosingQuote(content: string, start: number): number {
let i = start + 1
while (i < content.length) {
if (content[i] === BACKSLASH && i + 1 < content.length) {
// Skip escaped character
i += 2
continue
}
if (content[i] === DOUBLE_QUOTE) {
return i
}
i++
}
return -1 // Not found
}
/**
* Checks if a string contains a specific character outside of quoted sections.
*
* @param content The string to check
* @param char The character to look for
* @returns true if the character exists outside quotes, false otherwise
*/
export function hasUnquotedChar(content: string, char: string): boolean {
return findUnquotedChar(content, char) !== -1
}
/**
* Finds the index of a specific character outside of quoted sections.
*
* @param content The string to search in
* @param char The character to look for
* @param start Optional starting index (defaults to 0)
* @returns The index of the character, or -1 if not found outside quotes
*/
export function findUnquotedChar(content: string, char: string, start = 0): number {
let inQuotes = false
let i = start
while (i < content.length) {
if (content[i] === BACKSLASH && i + 1 < content.length && inQuotes) {
// Skip escaped character
i += 2
continue
}
if (content[i] === DOUBLE_QUOTE) {
inQuotes = !inQuotes
i++
continue
}
if (content[i] === char && !inQuotes) {
return i
}
i++
}
return -1
}
/**
* Checks if a string starts and ends with double quotes.
*
* @param content The string to check
* @returns true if the string is quoted, false otherwise
*/
export function isQuotedString(content: string): boolean {
const trimmed = content.trim()
return trimmed.startsWith(DOUBLE_QUOTE) && trimmed.endsWith(DOUBLE_QUOTE) && trimmed.length >= 2
}
/**
* Skips whitespace characters starting from a given index.
*
* @param content The string to process
* @param start The starting index
* @returns The index of the first non-whitespace character, or content.length if all whitespace
*/
export function skipWhitespace(content: string, start: number): number {
let i = start
while (i < content.length && /\s/.test(content[i]!)) {
i++
}
return i
}

97
src/decode/validation.ts Normal file
View File

@@ -0,0 +1,97 @@
import type { ArrayHeaderInfo, Delimiter, Depth, ResolvedDecodeOptions } from '../types'
import type { LineCursor } from './scanner'
import { COLON, LIST_ITEM_PREFIX } from '../constants'
/**
* Asserts that the actual count matches the expected count in strict mode.
*
* @param actual The actual count
* @param expected The expected count
* @param itemType The type of items being counted (e.g., 'list array items', 'tabular rows')
* @param options Decode options
* @throws RangeError if counts don't match in strict mode
*/
export function assertExpectedCount(
actual: number,
expected: number,
itemType: string,
options: ResolvedDecodeOptions,
): void {
if (options.strict && actual !== expected) {
throw new RangeError(`Expected ${expected} ${itemType}, but got ${actual}`)
}
}
/**
* Validates that there are no extra list items beyond the expected count.
*
* @param cursor The line cursor
* @param itemDepth The expected depth of items
* @param expectedCount The expected number of items
* @throws RangeError if extra items are found
*/
export function validateNoExtraListItems(
cursor: LineCursor,
itemDepth: Depth,
expectedCount: number,
): void {
if (cursor.atEnd())
return
const nextLine = cursor.peek()
if (nextLine && nextLine.depth === itemDepth && nextLine.content.startsWith(LIST_ITEM_PREFIX)) {
throw new RangeError(`Expected ${expectedCount} list array items, but found more`)
}
}
/**
* Checks if a line represents a data row (as opposed to a key-value pair) in a tabular array.
*
* @param content The line content
* @param delimiter The delimiter used in the table
* @returns true if the line is a data row, false if it's a key-value pair
*/
export function isDataRow(content: string, delimiter: Delimiter): boolean {
const colonPos = content.indexOf(COLON)
const delimiterPos = content.indexOf(delimiter)
// No colon = definitely a data row
if (colonPos === -1) {
return true
}
// Has delimiter and it comes before colon = data row
if (delimiterPos !== -1 && delimiterPos < colonPos) {
return true
}
// Colon before delimiter or no delimiter = key-value pair
return false
}
/**
* Validates that there are no extra tabular rows beyond the expected count.
*
* @param cursor The line cursor
* @param rowDepth The expected depth of rows
* @param header The array header info containing length and delimiter
* @throws RangeError if extra rows are found
*/
export function validateNoExtraTabularRows(
cursor: LineCursor,
rowDepth: Depth,
header: ArrayHeaderInfo,
): void {
if (cursor.atEnd())
return
const nextLine = cursor.peek()
if (
nextLine
&& nextLine.depth === rowDepth
&& !nextLine.content.startsWith(LIST_ITEM_PREFIX)
&& isDataRow(nextLine.content, header.delimiter)
) {
throw new RangeError(`Expected ${header.length} tabular rows, but found more`)
}
}

View File

@@ -6,10 +6,10 @@ import type {
ResolvedEncodeOptions, ResolvedEncodeOptions,
} from './types' } from './types'
import { DEFAULT_DELIMITER } from './constants' import { DEFAULT_DELIMITER } from './constants'
import { decodeValueFromLines } from './decoders' import { decodeValueFromLines } from './decode/decoders'
import { encodeValue } from './encoders' import { encodeValue } from './encoders'
import { normalizeValue } from './normalize' import { normalizeValue } from './normalize'
import { LineCursor, toParsedLines } from './scanner' import { LineCursor, toParsedLines } from './decode/scanner'
export { DEFAULT_DELIMITER, DELIMITERS } from './constants' export { DEFAULT_DELIMITER, DELIMITERS } from './constants'
export type { export type {