mirror of
https://github.com/voson-wang/toon.git
synced 2026-01-29 23:34:10 +08:00
refactor: decoding source files
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
import type { LineCursor } from './scanner'
|
||||
import type {
|
||||
ArrayHeaderInfo,
|
||||
Delimiter,
|
||||
Depth,
|
||||
JsonArray,
|
||||
JsonObject,
|
||||
@@ -8,12 +8,13 @@ import type {
|
||||
JsonValue,
|
||||
ParsedLine,
|
||||
ResolvedDecodeOptions,
|
||||
} from './types'
|
||||
} from '../types'
|
||||
import type { LineCursor } from './scanner'
|
||||
import {
|
||||
COLON,
|
||||
DEFAULT_DELIMITER,
|
||||
LIST_ITEM_PREFIX,
|
||||
} from './constants'
|
||||
} from '../constants'
|
||||
import {
|
||||
isArrayHeaderAfterHyphen,
|
||||
isObjectFirstFieldAfterHyphen,
|
||||
@@ -23,6 +24,12 @@ import {
|
||||
parseKeyToken,
|
||||
parsePrimitiveToken,
|
||||
} from './parser'
|
||||
import { findClosingQuote } from './string-utils'
|
||||
import {
|
||||
assertExpectedCount,
|
||||
validateNoExtraListItems,
|
||||
validateNoExtraTabularRows,
|
||||
} from './validation'
|
||||
|
||||
// #region Entry decoding
|
||||
|
||||
@@ -33,7 +40,7 @@ export function decodeValueFromLines(cursor: LineCursor, options: ResolvedDecode
|
||||
}
|
||||
|
||||
// Check for root array
|
||||
if (isRootArrayHeaderLine(first)) {
|
||||
if (isArrayHeaderAfterHyphen(first.content)) {
|
||||
const headerInfo = parseArrayHeaderLine(first.content, DEFAULT_DELIMITER)
|
||||
if (headerInfo) {
|
||||
cursor.advance() // Move past the header line
|
||||
@@ -50,29 +57,18 @@ export function decodeValueFromLines(cursor: LineCursor, options: ResolvedDecode
|
||||
return decodeObject(cursor, 0, options)
|
||||
}
|
||||
|
||||
function isRootArrayHeaderLine(line: ParsedLine): boolean {
|
||||
return isArrayHeaderAfterHyphen(line.content)
|
||||
}
|
||||
|
||||
function isKeyValueLine(line: ParsedLine): boolean {
|
||||
const content = line.content
|
||||
// Look for unquoted colon or quoted key followed by colon
|
||||
if (content.startsWith('"')) {
|
||||
// Quoted key
|
||||
let i = 1
|
||||
while (i < content.length) {
|
||||
if (content[i] === '\\' && i + 1 < content.length) {
|
||||
i += 2
|
||||
continue
|
||||
}
|
||||
if (content[i] === '"') {
|
||||
// Found end of quoted key, check for colon
|
||||
return content[i + 1] === COLON
|
||||
}
|
||||
i++
|
||||
}
|
||||
// Quoted key - find the closing quote
|
||||
const closingQuoteIndex = findClosingQuote(content, 0)
|
||||
if (closingQuoteIndex === -1) {
|
||||
return false
|
||||
}
|
||||
// Check if there's a colon after the quoted key
|
||||
return closingQuoteIndex + 1 < content.length && content[closingQuoteIndex + 1] === COLON
|
||||
}
|
||||
else {
|
||||
// Unquoted key - look for first colon not inside quotes
|
||||
return content.includes(COLON)
|
||||
@@ -227,11 +223,8 @@ function decodeListArray(
|
||||
assertExpectedCount(items.length, header.length, 'list array items', options)
|
||||
|
||||
// In strict mode, check for extra items
|
||||
if (options.strict && !cursor.atEnd()) {
|
||||
const nextLine = cursor.peek()
|
||||
if (nextLine && nextLine.depth === itemDepth && nextLine.content.startsWith(LIST_ITEM_PREFIX)) {
|
||||
throw new RangeError(`Expected ${header.length} list array items, but found more`)
|
||||
}
|
||||
if (options.strict) {
|
||||
validateNoExtraListItems(cursor, itemDepth, header.length)
|
||||
}
|
||||
|
||||
return items
|
||||
@@ -274,30 +267,8 @@ function decodeTabularArray(
|
||||
assertExpectedCount(objects.length, header.length, 'tabular rows', options)
|
||||
|
||||
// In strict mode, check for extra rows
|
||||
if (options.strict && !cursor.atEnd()) {
|
||||
const nextLine = cursor.peek()
|
||||
if (nextLine && nextLine.depth === rowDepth && !nextLine.content.startsWith(LIST_ITEM_PREFIX)) {
|
||||
// A key-value pair has a colon (and if it has delimiter, colon comes first)
|
||||
// A data row either has no colon, or has delimiter before colon
|
||||
const hasColon = nextLine.content.includes(COLON)
|
||||
const hasDelimiter = nextLine.content.includes(header.delimiter)
|
||||
|
||||
if (!hasColon) {
|
||||
// No colon = data row (for single-field tables)
|
||||
throw new RangeError(`Expected ${header.length} tabular rows, but found more`)
|
||||
}
|
||||
else if (hasDelimiter) {
|
||||
// Has both colon and delimiter - check which comes first
|
||||
const colonPos = nextLine.content.indexOf(COLON)
|
||||
const delimiterPos = nextLine.content.indexOf(header.delimiter)
|
||||
if (delimiterPos < colonPos) {
|
||||
// Delimiter before colon = data row
|
||||
throw new RangeError(`Expected ${header.length} tabular rows, but found more`)
|
||||
}
|
||||
// Colon before delimiter = key-value pair, OK
|
||||
}
|
||||
// Has colon but no delimiter = key-value pair, OK
|
||||
}
|
||||
if (options.strict) {
|
||||
validateNoExtraTabularRows(cursor, rowDepth, header)
|
||||
}
|
||||
|
||||
return objects
|
||||
@@ -310,7 +281,7 @@ function decodeTabularArray(
|
||||
function decodeListItem(
|
||||
cursor: LineCursor,
|
||||
baseDepth: Depth,
|
||||
activeDelimiter: string,
|
||||
activeDelimiter: Delimiter,
|
||||
options: ResolvedDecodeOptions,
|
||||
): JsonValue {
|
||||
const line = cursor.next()
|
||||
@@ -322,7 +293,7 @@ function decodeListItem(
|
||||
|
||||
// Check for array header after hyphen
|
||||
if (isArrayHeaderAfterHyphen(afterHyphen)) {
|
||||
const arrayHeader = parseArrayHeaderLine(afterHyphen, activeDelimiter as any)
|
||||
const arrayHeader = parseArrayHeaderLine(afterHyphen, activeDelimiter)
|
||||
if (arrayHeader) {
|
||||
return decodeArrayFromHeader(arrayHeader.header, arrayHeader.inlineValues, cursor, baseDepth, options)
|
||||
}
|
||||
@@ -344,7 +315,7 @@ function decodeObjectFromListItem(
|
||||
options: ResolvedDecodeOptions,
|
||||
): JsonObject {
|
||||
const afterHyphen = firstLine.content.slice(LIST_ITEM_PREFIX.length)
|
||||
const { key, value, followDepth } = decodeFirstFieldOnHyphen(afterHyphen, cursor, baseDepth, options)
|
||||
const { key, value, followDepth } = decodeKeyValue(afterHyphen, cursor, baseDepth, options)
|
||||
|
||||
const obj: JsonObject = { [key]: value }
|
||||
|
||||
@@ -367,23 +338,4 @@ function decodeObjectFromListItem(
|
||||
return obj
|
||||
}
|
||||
|
||||
function decodeFirstFieldOnHyphen(
|
||||
rest: string,
|
||||
cursor: LineCursor,
|
||||
baseDepth: Depth,
|
||||
options: ResolvedDecodeOptions,
|
||||
): { key: string, value: JsonValue, followDepth: Depth } {
|
||||
return decodeKeyValue(rest, cursor, baseDepth, options)
|
||||
}
|
||||
|
||||
// #endregion
|
||||
|
||||
// #region Validation
|
||||
|
||||
function assertExpectedCount(actual: number, expected: number, what: string, options: ResolvedDecodeOptions): void {
|
||||
if (options.strict && actual !== expected) {
|
||||
throw new RangeError(`Expected ${expected} ${what}, but got ${actual}`)
|
||||
}
|
||||
}
|
||||
|
||||
// #endregion
|
||||
@@ -2,7 +2,7 @@ import type {
|
||||
ArrayHeaderInfo,
|
||||
Delimiter,
|
||||
JsonPrimitive,
|
||||
} from './types'
|
||||
} from '../types'
|
||||
import {
|
||||
BACKSLASH,
|
||||
CARRIAGE_RETURN,
|
||||
@@ -20,7 +20,8 @@ import {
|
||||
PIPE,
|
||||
TAB,
|
||||
TRUE_LITERAL,
|
||||
} from './constants'
|
||||
} from '../constants'
|
||||
import { findClosingQuote, hasUnquotedChar } from './string-utils'
|
||||
|
||||
// #region Array header parsing
|
||||
|
||||
@@ -246,26 +247,19 @@ export function parseStringLiteral(token: string): string {
|
||||
|
||||
if (trimmed.startsWith(DOUBLE_QUOTE)) {
|
||||
// Find the closing quote, accounting for escaped quotes
|
||||
let i = 1
|
||||
while (i < trimmed.length) {
|
||||
if (trimmed[i] === BACKSLASH && i + 1 < trimmed.length) {
|
||||
// Skip escaped character
|
||||
i += 2
|
||||
continue
|
||||
}
|
||||
if (trimmed[i] === DOUBLE_QUOTE) {
|
||||
// Found closing quote
|
||||
if (i !== trimmed.length - 1) {
|
||||
throw new SyntaxError('Unexpected characters after closing quote')
|
||||
}
|
||||
const content = trimmed.slice(1, i)
|
||||
return unescapeString(content)
|
||||
}
|
||||
i++
|
||||
const closingQuoteIndex = findClosingQuote(trimmed, 0)
|
||||
|
||||
if (closingQuoteIndex === -1) {
|
||||
// No closing quote was found
|
||||
throw new SyntaxError('Unterminated string: missing closing quote')
|
||||
}
|
||||
|
||||
// If we get here, no closing quote was found
|
||||
throw new SyntaxError('Unterminated string: missing closing quote')
|
||||
if (closingQuoteIndex !== trimmed.length - 1) {
|
||||
throw new SyntaxError('Unexpected characters after closing quote')
|
||||
}
|
||||
|
||||
const content = trimmed.slice(1, closingQuoteIndex)
|
||||
return unescapeString(content)
|
||||
}
|
||||
|
||||
return trimmed
|
||||
@@ -338,20 +332,17 @@ export function parseUnquotedKey(content: string, start: number): { key: string,
|
||||
}
|
||||
|
||||
export function parseQuotedKey(content: string, start: number): { key: string, end: number } {
|
||||
let i = start + 1 // Skip opening quote
|
||||
let keyContent = ''
|
||||
// Find the closing quote, accounting for escaped quotes
|
||||
const closingQuoteIndex = findClosingQuote(content, start)
|
||||
|
||||
while (i < content.length) {
|
||||
if (content[i] === BACKSLASH && i + 1 < content.length) {
|
||||
keyContent += content[i]! + content[i + 1]
|
||||
i += 2
|
||||
continue
|
||||
if (closingQuoteIndex === -1) {
|
||||
throw new SyntaxError('Unterminated quoted key')
|
||||
}
|
||||
|
||||
if (content[i] === DOUBLE_QUOTE) {
|
||||
// Found closing quote
|
||||
// Extract and unescape the key content
|
||||
const keyContent = content.slice(start + 1, closingQuoteIndex)
|
||||
const key = unescapeString(keyContent)
|
||||
let end = i + 1
|
||||
let end = closingQuoteIndex + 1
|
||||
|
||||
// Validate and skip colon after quoted key
|
||||
if (end >= content.length || content[end] !== COLON) {
|
||||
@@ -362,13 +353,6 @@ export function parseQuotedKey(content: string, start: number): { key: string, e
|
||||
return { key, end }
|
||||
}
|
||||
|
||||
keyContent += content[i]
|
||||
i++
|
||||
}
|
||||
|
||||
throw new SyntaxError('Unterminated quoted key')
|
||||
}
|
||||
|
||||
export function parseKeyToken(content: string, start: number): { key: string, end: number } {
|
||||
if (content[start] === DOUBLE_QUOTE) {
|
||||
return parseQuotedKey(content, start)
|
||||
@@ -383,11 +367,11 @@ export function parseKeyToken(content: string, start: number): { key: string, en
|
||||
// #region Array content detection helpers
|
||||
|
||||
export function isArrayHeaderAfterHyphen(content: string): boolean {
|
||||
return content.trim().startsWith(OPEN_BRACKET) && content.includes(COLON)
|
||||
return content.trim().startsWith(OPEN_BRACKET) && hasUnquotedChar(content, COLON)
|
||||
}
|
||||
|
||||
export function isObjectFirstFieldAfterHyphen(content: string): boolean {
|
||||
return content.includes(COLON)
|
||||
return hasUnquotedChar(content, COLON)
|
||||
}
|
||||
|
||||
// #endregion
|
||||
@@ -1,5 +1,5 @@
|
||||
import type { Depth, ParsedLine } from './types'
|
||||
import { SPACE } from './constants'
|
||||
import type { Depth, ParsedLine } from '../types'
|
||||
import { SPACE } from '../constants'
|
||||
|
||||
export class LineCursor {
|
||||
private lines: ParsedLine[]
|
||||
@@ -33,6 +33,21 @@ export class LineCursor {
|
||||
get length(): number {
|
||||
return this.lines.length
|
||||
}
|
||||
|
||||
peekAtDepth(targetDepth: Depth): ParsedLine | undefined {
|
||||
const line = this.peek()
|
||||
if (!line || line.depth < targetDepth) {
|
||||
return undefined
|
||||
}
|
||||
if (line.depth === targetDepth) {
|
||||
return line
|
||||
}
|
||||
return undefined
|
||||
}
|
||||
|
||||
hasMoreAtDepth(targetDepth: Depth): boolean {
|
||||
return this.peekAtDepth(targetDepth) !== undefined
|
||||
}
|
||||
}
|
||||
|
||||
export function toParsedLines(source: string, indentSize: number): ParsedLine[] {
|
||||
96
src/decode/string-utils.ts
Normal file
96
src/decode/string-utils.ts
Normal file
@@ -0,0 +1,96 @@
|
||||
import { BACKSLASH, DOUBLE_QUOTE } from '../constants'
|
||||
|
||||
/**
|
||||
* Finds the index of the closing double quote in a string, accounting for escape sequences.
|
||||
*
|
||||
* @param content The string to search in
|
||||
* @param start The index of the opening quote
|
||||
* @returns The index of the closing quote, or -1 if not found
|
||||
*/
|
||||
export function findClosingQuote(content: string, start: number): number {
|
||||
let i = start + 1
|
||||
while (i < content.length) {
|
||||
if (content[i] === BACKSLASH && i + 1 < content.length) {
|
||||
// Skip escaped character
|
||||
i += 2
|
||||
continue
|
||||
}
|
||||
if (content[i] === DOUBLE_QUOTE) {
|
||||
return i
|
||||
}
|
||||
i++
|
||||
}
|
||||
return -1 // Not found
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if a string contains a specific character outside of quoted sections.
|
||||
*
|
||||
* @param content The string to check
|
||||
* @param char The character to look for
|
||||
* @returns true if the character exists outside quotes, false otherwise
|
||||
*/
|
||||
export function hasUnquotedChar(content: string, char: string): boolean {
|
||||
return findUnquotedChar(content, char) !== -1
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the index of a specific character outside of quoted sections.
|
||||
*
|
||||
* @param content The string to search in
|
||||
* @param char The character to look for
|
||||
* @param start Optional starting index (defaults to 0)
|
||||
* @returns The index of the character, or -1 if not found outside quotes
|
||||
*/
|
||||
export function findUnquotedChar(content: string, char: string, start = 0): number {
|
||||
let inQuotes = false
|
||||
let i = start
|
||||
|
||||
while (i < content.length) {
|
||||
if (content[i] === BACKSLASH && i + 1 < content.length && inQuotes) {
|
||||
// Skip escaped character
|
||||
i += 2
|
||||
continue
|
||||
}
|
||||
|
||||
if (content[i] === DOUBLE_QUOTE) {
|
||||
inQuotes = !inQuotes
|
||||
i++
|
||||
continue
|
||||
}
|
||||
|
||||
if (content[i] === char && !inQuotes) {
|
||||
return i
|
||||
}
|
||||
|
||||
i++
|
||||
}
|
||||
|
||||
return -1
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if a string starts and ends with double quotes.
|
||||
*
|
||||
* @param content The string to check
|
||||
* @returns true if the string is quoted, false otherwise
|
||||
*/
|
||||
export function isQuotedString(content: string): boolean {
|
||||
const trimmed = content.trim()
|
||||
return trimmed.startsWith(DOUBLE_QUOTE) && trimmed.endsWith(DOUBLE_QUOTE) && trimmed.length >= 2
|
||||
}
|
||||
|
||||
/**
|
||||
* Skips whitespace characters starting from a given index.
|
||||
*
|
||||
* @param content The string to process
|
||||
* @param start The starting index
|
||||
* @returns The index of the first non-whitespace character, or content.length if all whitespace
|
||||
*/
|
||||
export function skipWhitespace(content: string, start: number): number {
|
||||
let i = start
|
||||
while (i < content.length && /\s/.test(content[i]!)) {
|
||||
i++
|
||||
}
|
||||
return i
|
||||
}
|
||||
97
src/decode/validation.ts
Normal file
97
src/decode/validation.ts
Normal file
@@ -0,0 +1,97 @@
|
||||
import type { ArrayHeaderInfo, Delimiter, Depth, ResolvedDecodeOptions } from '../types'
|
||||
import type { LineCursor } from './scanner'
|
||||
import { COLON, LIST_ITEM_PREFIX } from '../constants'
|
||||
|
||||
/**
|
||||
* Asserts that the actual count matches the expected count in strict mode.
|
||||
*
|
||||
* @param actual The actual count
|
||||
* @param expected The expected count
|
||||
* @param itemType The type of items being counted (e.g., 'list array items', 'tabular rows')
|
||||
* @param options Decode options
|
||||
* @throws RangeError if counts don't match in strict mode
|
||||
*/
|
||||
export function assertExpectedCount(
|
||||
actual: number,
|
||||
expected: number,
|
||||
itemType: string,
|
||||
options: ResolvedDecodeOptions,
|
||||
): void {
|
||||
if (options.strict && actual !== expected) {
|
||||
throw new RangeError(`Expected ${expected} ${itemType}, but got ${actual}`)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Validates that there are no extra list items beyond the expected count.
|
||||
*
|
||||
* @param cursor The line cursor
|
||||
* @param itemDepth The expected depth of items
|
||||
* @param expectedCount The expected number of items
|
||||
* @throws RangeError if extra items are found
|
||||
*/
|
||||
export function validateNoExtraListItems(
|
||||
cursor: LineCursor,
|
||||
itemDepth: Depth,
|
||||
expectedCount: number,
|
||||
): void {
|
||||
if (cursor.atEnd())
|
||||
return
|
||||
|
||||
const nextLine = cursor.peek()
|
||||
if (nextLine && nextLine.depth === itemDepth && nextLine.content.startsWith(LIST_ITEM_PREFIX)) {
|
||||
throw new RangeError(`Expected ${expectedCount} list array items, but found more`)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if a line represents a data row (as opposed to a key-value pair) in a tabular array.
|
||||
*
|
||||
* @param content The line content
|
||||
* @param delimiter The delimiter used in the table
|
||||
* @returns true if the line is a data row, false if it's a key-value pair
|
||||
*/
|
||||
export function isDataRow(content: string, delimiter: Delimiter): boolean {
|
||||
const colonPos = content.indexOf(COLON)
|
||||
const delimiterPos = content.indexOf(delimiter)
|
||||
|
||||
// No colon = definitely a data row
|
||||
if (colonPos === -1) {
|
||||
return true
|
||||
}
|
||||
|
||||
// Has delimiter and it comes before colon = data row
|
||||
if (delimiterPos !== -1 && delimiterPos < colonPos) {
|
||||
return true
|
||||
}
|
||||
|
||||
// Colon before delimiter or no delimiter = key-value pair
|
||||
return false
|
||||
}
|
||||
|
||||
/**
|
||||
* Validates that there are no extra tabular rows beyond the expected count.
|
||||
*
|
||||
* @param cursor The line cursor
|
||||
* @param rowDepth The expected depth of rows
|
||||
* @param header The array header info containing length and delimiter
|
||||
* @throws RangeError if extra rows are found
|
||||
*/
|
||||
export function validateNoExtraTabularRows(
|
||||
cursor: LineCursor,
|
||||
rowDepth: Depth,
|
||||
header: ArrayHeaderInfo,
|
||||
): void {
|
||||
if (cursor.atEnd())
|
||||
return
|
||||
|
||||
const nextLine = cursor.peek()
|
||||
if (
|
||||
nextLine
|
||||
&& nextLine.depth === rowDepth
|
||||
&& !nextLine.content.startsWith(LIST_ITEM_PREFIX)
|
||||
&& isDataRow(nextLine.content, header.delimiter)
|
||||
) {
|
||||
throw new RangeError(`Expected ${header.length} tabular rows, but found more`)
|
||||
}
|
||||
}
|
||||
@@ -6,10 +6,10 @@ import type {
|
||||
ResolvedEncodeOptions,
|
||||
} from './types'
|
||||
import { DEFAULT_DELIMITER } from './constants'
|
||||
import { decodeValueFromLines } from './decoders'
|
||||
import { decodeValueFromLines } from './decode/decoders'
|
||||
import { encodeValue } from './encoders'
|
||||
import { normalizeValue } from './normalize'
|
||||
import { LineCursor, toParsedLines } from './scanner'
|
||||
import { LineCursor, toParsedLines } from './decode/scanner'
|
||||
|
||||
export { DEFAULT_DELIMITER, DELIMITERS } from './constants'
|
||||
export type {
|
||||
|
||||
Reference in New Issue
Block a user