refactor: decoding source files

This commit is contained in:
Johann Schopplich
2025-10-29 10:12:13 +01:00
parent c4f00bd69f
commit 8d238f8eeb
6 changed files with 268 additions and 124 deletions

View File

@@ -1,6 +1,6 @@
import type { LineCursor } from './scanner'
import type {
ArrayHeaderInfo,
Delimiter,
Depth,
JsonArray,
JsonObject,
@@ -8,12 +8,13 @@ import type {
JsonValue,
ParsedLine,
ResolvedDecodeOptions,
} from './types'
} from '../types'
import type { LineCursor } from './scanner'
import {
COLON,
DEFAULT_DELIMITER,
LIST_ITEM_PREFIX,
} from './constants'
} from '../constants'
import {
isArrayHeaderAfterHyphen,
isObjectFirstFieldAfterHyphen,
@@ -23,6 +24,12 @@ import {
parseKeyToken,
parsePrimitiveToken,
} from './parser'
import { findClosingQuote } from './string-utils'
import {
assertExpectedCount,
validateNoExtraListItems,
validateNoExtraTabularRows,
} from './validation'
// #region Entry decoding
@@ -33,7 +40,7 @@ export function decodeValueFromLines(cursor: LineCursor, options: ResolvedDecode
}
// Check for root array
if (isRootArrayHeaderLine(first)) {
if (isArrayHeaderAfterHyphen(first.content)) {
const headerInfo = parseArrayHeaderLine(first.content, DEFAULT_DELIMITER)
if (headerInfo) {
cursor.advance() // Move past the header line
@@ -50,29 +57,18 @@ export function decodeValueFromLines(cursor: LineCursor, options: ResolvedDecode
return decodeObject(cursor, 0, options)
}
function isRootArrayHeaderLine(line: ParsedLine): boolean {
return isArrayHeaderAfterHyphen(line.content)
}
function isKeyValueLine(line: ParsedLine): boolean {
const content = line.content
// Look for unquoted colon or quoted key followed by colon
if (content.startsWith('"')) {
// Quoted key
let i = 1
while (i < content.length) {
if (content[i] === '\\' && i + 1 < content.length) {
i += 2
continue
}
if (content[i] === '"') {
// Found end of quoted key, check for colon
return content[i + 1] === COLON
}
i++
}
// Quoted key - find the closing quote
const closingQuoteIndex = findClosingQuote(content, 0)
if (closingQuoteIndex === -1) {
return false
}
// Check if there's a colon after the quoted key
return closingQuoteIndex + 1 < content.length && content[closingQuoteIndex + 1] === COLON
}
else {
// Unquoted key - look for first colon not inside quotes
return content.includes(COLON)
@@ -227,11 +223,8 @@ function decodeListArray(
assertExpectedCount(items.length, header.length, 'list array items', options)
// In strict mode, check for extra items
if (options.strict && !cursor.atEnd()) {
const nextLine = cursor.peek()
if (nextLine && nextLine.depth === itemDepth && nextLine.content.startsWith(LIST_ITEM_PREFIX)) {
throw new RangeError(`Expected ${header.length} list array items, but found more`)
}
if (options.strict) {
validateNoExtraListItems(cursor, itemDepth, header.length)
}
return items
@@ -274,30 +267,8 @@ function decodeTabularArray(
assertExpectedCount(objects.length, header.length, 'tabular rows', options)
// In strict mode, check for extra rows
if (options.strict && !cursor.atEnd()) {
const nextLine = cursor.peek()
if (nextLine && nextLine.depth === rowDepth && !nextLine.content.startsWith(LIST_ITEM_PREFIX)) {
// A key-value pair has a colon (and if it has delimiter, colon comes first)
// A data row either has no colon, or has delimiter before colon
const hasColon = nextLine.content.includes(COLON)
const hasDelimiter = nextLine.content.includes(header.delimiter)
if (!hasColon) {
// No colon = data row (for single-field tables)
throw new RangeError(`Expected ${header.length} tabular rows, but found more`)
}
else if (hasDelimiter) {
// Has both colon and delimiter - check which comes first
const colonPos = nextLine.content.indexOf(COLON)
const delimiterPos = nextLine.content.indexOf(header.delimiter)
if (delimiterPos < colonPos) {
// Delimiter before colon = data row
throw new RangeError(`Expected ${header.length} tabular rows, but found more`)
}
// Colon before delimiter = key-value pair, OK
}
// Has colon but no delimiter = key-value pair, OK
}
if (options.strict) {
validateNoExtraTabularRows(cursor, rowDepth, header)
}
return objects
@@ -310,7 +281,7 @@ function decodeTabularArray(
function decodeListItem(
cursor: LineCursor,
baseDepth: Depth,
activeDelimiter: string,
activeDelimiter: Delimiter,
options: ResolvedDecodeOptions,
): JsonValue {
const line = cursor.next()
@@ -322,7 +293,7 @@ function decodeListItem(
// Check for array header after hyphen
if (isArrayHeaderAfterHyphen(afterHyphen)) {
const arrayHeader = parseArrayHeaderLine(afterHyphen, activeDelimiter as any)
const arrayHeader = parseArrayHeaderLine(afterHyphen, activeDelimiter)
if (arrayHeader) {
return decodeArrayFromHeader(arrayHeader.header, arrayHeader.inlineValues, cursor, baseDepth, options)
}
@@ -344,7 +315,7 @@ function decodeObjectFromListItem(
options: ResolvedDecodeOptions,
): JsonObject {
const afterHyphen = firstLine.content.slice(LIST_ITEM_PREFIX.length)
const { key, value, followDepth } = decodeFirstFieldOnHyphen(afterHyphen, cursor, baseDepth, options)
const { key, value, followDepth } = decodeKeyValue(afterHyphen, cursor, baseDepth, options)
const obj: JsonObject = { [key]: value }
@@ -367,23 +338,4 @@ function decodeObjectFromListItem(
return obj
}
function decodeFirstFieldOnHyphen(
rest: string,
cursor: LineCursor,
baseDepth: Depth,
options: ResolvedDecodeOptions,
): { key: string, value: JsonValue, followDepth: Depth } {
return decodeKeyValue(rest, cursor, baseDepth, options)
}
// #endregion
// #region Validation
function assertExpectedCount(actual: number, expected: number, what: string, options: ResolvedDecodeOptions): void {
if (options.strict && actual !== expected) {
throw new RangeError(`Expected ${expected} ${what}, but got ${actual}`)
}
}
// #endregion

View File

@@ -2,7 +2,7 @@ import type {
ArrayHeaderInfo,
Delimiter,
JsonPrimitive,
} from './types'
} from '../types'
import {
BACKSLASH,
CARRIAGE_RETURN,
@@ -20,7 +20,8 @@ import {
PIPE,
TAB,
TRUE_LITERAL,
} from './constants'
} from '../constants'
import { findClosingQuote, hasUnquotedChar } from './string-utils'
// #region Array header parsing
@@ -246,26 +247,19 @@ export function parseStringLiteral(token: string): string {
if (trimmed.startsWith(DOUBLE_QUOTE)) {
// Find the closing quote, accounting for escaped quotes
let i = 1
while (i < trimmed.length) {
if (trimmed[i] === BACKSLASH && i + 1 < trimmed.length) {
// Skip escaped character
i += 2
continue
}
if (trimmed[i] === DOUBLE_QUOTE) {
// Found closing quote
if (i !== trimmed.length - 1) {
throw new SyntaxError('Unexpected characters after closing quote')
}
const content = trimmed.slice(1, i)
return unescapeString(content)
}
i++
const closingQuoteIndex = findClosingQuote(trimmed, 0)
if (closingQuoteIndex === -1) {
// No closing quote was found
throw new SyntaxError('Unterminated string: missing closing quote')
}
// If we get here, no closing quote was found
throw new SyntaxError('Unterminated string: missing closing quote')
if (closingQuoteIndex !== trimmed.length - 1) {
throw new SyntaxError('Unexpected characters after closing quote')
}
const content = trimmed.slice(1, closingQuoteIndex)
return unescapeString(content)
}
return trimmed
@@ -338,20 +332,17 @@ export function parseUnquotedKey(content: string, start: number): { key: string,
}
export function parseQuotedKey(content: string, start: number): { key: string, end: number } {
let i = start + 1 // Skip opening quote
let keyContent = ''
// Find the closing quote, accounting for escaped quotes
const closingQuoteIndex = findClosingQuote(content, start)
while (i < content.length) {
if (content[i] === BACKSLASH && i + 1 < content.length) {
keyContent += content[i]! + content[i + 1]
i += 2
continue
if (closingQuoteIndex === -1) {
throw new SyntaxError('Unterminated quoted key')
}
if (content[i] === DOUBLE_QUOTE) {
// Found closing quote
// Extract and unescape the key content
const keyContent = content.slice(start + 1, closingQuoteIndex)
const key = unescapeString(keyContent)
let end = i + 1
let end = closingQuoteIndex + 1
// Validate and skip colon after quoted key
if (end >= content.length || content[end] !== COLON) {
@@ -360,13 +351,6 @@ export function parseQuotedKey(content: string, start: number): { key: string, e
end++
return { key, end }
}
keyContent += content[i]
i++
}
throw new SyntaxError('Unterminated quoted key')
}
export function parseKeyToken(content: string, start: number): { key: string, end: number } {
@@ -383,11 +367,11 @@ export function parseKeyToken(content: string, start: number): { key: string, en
// #region Array content detection helpers
export function isArrayHeaderAfterHyphen(content: string): boolean {
return content.trim().startsWith(OPEN_BRACKET) && content.includes(COLON)
return content.trim().startsWith(OPEN_BRACKET) && hasUnquotedChar(content, COLON)
}
export function isObjectFirstFieldAfterHyphen(content: string): boolean {
return content.includes(COLON)
return hasUnquotedChar(content, COLON)
}
// #endregion

View File

@@ -1,5 +1,5 @@
import type { Depth, ParsedLine } from './types'
import { SPACE } from './constants'
import type { Depth, ParsedLine } from '../types'
import { SPACE } from '../constants'
export class LineCursor {
private lines: ParsedLine[]
@@ -33,6 +33,21 @@ export class LineCursor {
get length(): number {
return this.lines.length
}
peekAtDepth(targetDepth: Depth): ParsedLine | undefined {
const line = this.peek()
if (!line || line.depth < targetDepth) {
return undefined
}
if (line.depth === targetDepth) {
return line
}
return undefined
}
hasMoreAtDepth(targetDepth: Depth): boolean {
return this.peekAtDepth(targetDepth) !== undefined
}
}
export function toParsedLines(source: string, indentSize: number): ParsedLine[] {

View File

@@ -0,0 +1,96 @@
import { BACKSLASH, DOUBLE_QUOTE } from '../constants'
/**
* Finds the index of the closing double quote in a string, accounting for escape sequences.
*
* @param content The string to search in
* @param start The index of the opening quote
* @returns The index of the closing quote, or -1 if not found
*/
export function findClosingQuote(content: string, start: number): number {
let i = start + 1
while (i < content.length) {
if (content[i] === BACKSLASH && i + 1 < content.length) {
// Skip escaped character
i += 2
continue
}
if (content[i] === DOUBLE_QUOTE) {
return i
}
i++
}
return -1 // Not found
}
/**
* Checks if a string contains a specific character outside of quoted sections.
*
* @param content The string to check
* @param char The character to look for
* @returns true if the character exists outside quotes, false otherwise
*/
export function hasUnquotedChar(content: string, char: string): boolean {
return findUnquotedChar(content, char) !== -1
}
/**
* Finds the index of a specific character outside of quoted sections.
*
* @param content The string to search in
* @param char The character to look for
* @param start Optional starting index (defaults to 0)
* @returns The index of the character, or -1 if not found outside quotes
*/
export function findUnquotedChar(content: string, char: string, start = 0): number {
let inQuotes = false
let i = start
while (i < content.length) {
if (content[i] === BACKSLASH && i + 1 < content.length && inQuotes) {
// Skip escaped character
i += 2
continue
}
if (content[i] === DOUBLE_QUOTE) {
inQuotes = !inQuotes
i++
continue
}
if (content[i] === char && !inQuotes) {
return i
}
i++
}
return -1
}
/**
* Checks if a string starts and ends with double quotes.
*
* @param content The string to check
* @returns true if the string is quoted, false otherwise
*/
export function isQuotedString(content: string): boolean {
const trimmed = content.trim()
return trimmed.startsWith(DOUBLE_QUOTE) && trimmed.endsWith(DOUBLE_QUOTE) && trimmed.length >= 2
}
/**
* Skips whitespace characters starting from a given index.
*
* @param content The string to process
* @param start The starting index
* @returns The index of the first non-whitespace character, or content.length if all whitespace
*/
export function skipWhitespace(content: string, start: number): number {
let i = start
while (i < content.length && /\s/.test(content[i]!)) {
i++
}
return i
}

97
src/decode/validation.ts Normal file
View File

@@ -0,0 +1,97 @@
import type { ArrayHeaderInfo, Delimiter, Depth, ResolvedDecodeOptions } from '../types'
import type { LineCursor } from './scanner'
import { COLON, LIST_ITEM_PREFIX } from '../constants'
/**
* Asserts that the actual count matches the expected count in strict mode.
*
* @param actual The actual count
* @param expected The expected count
* @param itemType The type of items being counted (e.g., 'list array items', 'tabular rows')
* @param options Decode options
* @throws RangeError if counts don't match in strict mode
*/
export function assertExpectedCount(
actual: number,
expected: number,
itemType: string,
options: ResolvedDecodeOptions,
): void {
if (options.strict && actual !== expected) {
throw new RangeError(`Expected ${expected} ${itemType}, but got ${actual}`)
}
}
/**
* Validates that there are no extra list items beyond the expected count.
*
* @param cursor The line cursor
* @param itemDepth The expected depth of items
* @param expectedCount The expected number of items
* @throws RangeError if extra items are found
*/
export function validateNoExtraListItems(
cursor: LineCursor,
itemDepth: Depth,
expectedCount: number,
): void {
if (cursor.atEnd())
return
const nextLine = cursor.peek()
if (nextLine && nextLine.depth === itemDepth && nextLine.content.startsWith(LIST_ITEM_PREFIX)) {
throw new RangeError(`Expected ${expectedCount} list array items, but found more`)
}
}
/**
* Checks if a line represents a data row (as opposed to a key-value pair) in a tabular array.
*
* @param content The line content
* @param delimiter The delimiter used in the table
* @returns true if the line is a data row, false if it's a key-value pair
*/
export function isDataRow(content: string, delimiter: Delimiter): boolean {
const colonPos = content.indexOf(COLON)
const delimiterPos = content.indexOf(delimiter)
// No colon = definitely a data row
if (colonPos === -1) {
return true
}
// Has delimiter and it comes before colon = data row
if (delimiterPos !== -1 && delimiterPos < colonPos) {
return true
}
// Colon before delimiter or no delimiter = key-value pair
return false
}
/**
* Validates that there are no extra tabular rows beyond the expected count.
*
* @param cursor The line cursor
* @param rowDepth The expected depth of rows
* @param header The array header info containing length and delimiter
* @throws RangeError if extra rows are found
*/
export function validateNoExtraTabularRows(
cursor: LineCursor,
rowDepth: Depth,
header: ArrayHeaderInfo,
): void {
if (cursor.atEnd())
return
const nextLine = cursor.peek()
if (
nextLine
&& nextLine.depth === rowDepth
&& !nextLine.content.startsWith(LIST_ITEM_PREFIX)
&& isDataRow(nextLine.content, header.delimiter)
) {
throw new RangeError(`Expected ${header.length} tabular rows, but found more`)
}
}

View File

@@ -6,10 +6,10 @@ import type {
ResolvedEncodeOptions,
} from './types'
import { DEFAULT_DELIMITER } from './constants'
import { decodeValueFromLines } from './decoders'
import { decodeValueFromLines } from './decode/decoders'
import { encodeValue } from './encoders'
import { normalizeValue } from './normalize'
import { LineCursor, toParsedLines } from './scanner'
import { LineCursor, toParsedLines } from './decode/scanner'
export { DEFAULT_DELIMITER, DELIMITERS } from './constants'
export type {