mirror of
https://github.com/voson-wang/toon.git
synced 2026-01-29 23:34:10 +08:00
feat: decode method (#10)
This commit is contained in:
@@ -11,6 +11,7 @@ export const COMMA = ','
|
||||
export const COLON = ':'
|
||||
export const SPACE = ' '
|
||||
export const PIPE = '|'
|
||||
export const HASH = '#'
|
||||
|
||||
// #endregion
|
||||
|
||||
|
||||
419
src/decoders.ts
Normal file
419
src/decoders.ts
Normal file
@@ -0,0 +1,419 @@
|
||||
import type { LineCursor } from './scanner'
|
||||
import type {
|
||||
ArrayHeaderInfo,
|
||||
Depth,
|
||||
JsonArray,
|
||||
JsonObject,
|
||||
JsonPrimitive,
|
||||
JsonValue,
|
||||
ParsedLine,
|
||||
ResolvedDecodeOptions,
|
||||
} from './types'
|
||||
import {
|
||||
COLON,
|
||||
DEFAULT_DELIMITER,
|
||||
LIST_ITEM_PREFIX,
|
||||
} from './constants'
|
||||
import {
|
||||
isArrayHeaderAfterHyphen,
|
||||
isObjectFirstFieldAfterHyphen,
|
||||
parseArrayHeaderLine,
|
||||
parseKeyToken,
|
||||
parsePrimitiveToken,
|
||||
parseRowValuesToPrimitives,
|
||||
splitDelimitedValues,
|
||||
} from './parser'
|
||||
|
||||
// #region Entry decoding
|
||||
|
||||
export function decodeValueFromLines(cursor: LineCursor, options: ResolvedDecodeOptions): JsonValue {
|
||||
const first = cursor.peek()
|
||||
if (!first) {
|
||||
throw new Error('No content to decode')
|
||||
}
|
||||
|
||||
// Check for root array
|
||||
if (isRootArrayHeaderLine(first)) {
|
||||
const headerInfo = parseArrayHeaderLine(first.content, DEFAULT_DELIMITER)
|
||||
if (headerInfo) {
|
||||
cursor.advance() // Move past the header line
|
||||
return decodeArrayFromHeader(headerInfo.header, first, cursor, 0, options)
|
||||
}
|
||||
}
|
||||
|
||||
// Check for single primitive value
|
||||
if (cursor.length === 1 && !isKeyValueLine(first)) {
|
||||
return parsePrimitiveToken(first.content.trim())
|
||||
}
|
||||
|
||||
// Default to object
|
||||
return decodeObject(cursor, 0, options)
|
||||
}
|
||||
|
||||
function isRootArrayHeaderLine(line: ParsedLine): boolean {
|
||||
const content = line.content.trim()
|
||||
// Root array: starts with [ and has a colon
|
||||
return content.startsWith('[') && content.includes(COLON)
|
||||
}
|
||||
|
||||
function isKeyValueLine(line: ParsedLine): boolean {
|
||||
const content = line.content
|
||||
// Look for unquoted colon or quoted key followed by colon
|
||||
if (content.startsWith('"')) {
|
||||
// Quoted key
|
||||
let i = 1
|
||||
while (i < content.length) {
|
||||
if (content[i] === '\\' && i + 1 < content.length) {
|
||||
i += 2
|
||||
continue
|
||||
}
|
||||
if (content[i] === '"') {
|
||||
// Found end of quoted key, check for colon
|
||||
return content[i + 1] === COLON
|
||||
}
|
||||
i++
|
||||
}
|
||||
return false
|
||||
}
|
||||
else {
|
||||
// Unquoted key - look for first colon not inside quotes
|
||||
return content.includes(COLON)
|
||||
}
|
||||
}
|
||||
|
||||
// #endregion
|
||||
|
||||
// #region Object decoding
|
||||
|
||||
function decodeObject(cursor: LineCursor, baseDepth: Depth, options: ResolvedDecodeOptions): JsonObject {
|
||||
const obj: JsonObject = {}
|
||||
|
||||
while (!cursor.atEnd()) {
|
||||
const line = cursor.peek()
|
||||
if (!line || line.depth < baseDepth) {
|
||||
break
|
||||
}
|
||||
|
||||
if (line.depth === baseDepth) {
|
||||
const [key, value] = decodeKeyValuePair(line, cursor, baseDepth, options)
|
||||
obj[key] = value
|
||||
}
|
||||
else {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return obj
|
||||
}
|
||||
|
||||
function decodeKeyValuePair(
|
||||
line: ParsedLine,
|
||||
cursor: LineCursor,
|
||||
baseDepth: Depth,
|
||||
options: ResolvedDecodeOptions,
|
||||
): [key: string, value: JsonValue] {
|
||||
cursor.advance()
|
||||
|
||||
// Check for array header first (before parsing key)
|
||||
const arrayHeader = parseArrayHeaderLine(line.content, DEFAULT_DELIMITER)
|
||||
if (arrayHeader && arrayHeader.header.key) {
|
||||
const value = decodeArrayFromHeader(arrayHeader.header, line, cursor, baseDepth, options)
|
||||
return [arrayHeader.header.key, value]
|
||||
}
|
||||
|
||||
// Regular key-value pair
|
||||
const { key, end } = parseKeyToken(line.content, 0)
|
||||
const rest = line.content.slice(end).trim()
|
||||
|
||||
// No value after colon - expect nested object or empty
|
||||
if (!rest) {
|
||||
const nextLine = cursor.peek()
|
||||
if (nextLine && nextLine.depth > baseDepth) {
|
||||
const nested = expectNestedObject(cursor, baseDepth + 1, options)
|
||||
return [key, nested]
|
||||
}
|
||||
// Empty object
|
||||
return [key, {}]
|
||||
}
|
||||
|
||||
// Inline primitive value
|
||||
const value = parsePrimitiveToken(rest)
|
||||
return [key, value]
|
||||
}
|
||||
|
||||
function expectNestedObject(cursor: LineCursor, nestedDepth: Depth, options: ResolvedDecodeOptions): JsonObject {
|
||||
return decodeObject(cursor, nestedDepth, options)
|
||||
}
|
||||
|
||||
// #endregion
|
||||
|
||||
// #region Array decoding
|
||||
|
||||
function decodeArrayFromHeader(
|
||||
header: ArrayHeaderInfo,
|
||||
line: ParsedLine,
|
||||
cursor: LineCursor,
|
||||
baseDepth: Depth,
|
||||
options: ResolvedDecodeOptions,
|
||||
): JsonArray {
|
||||
const arrayHeader = parseArrayHeaderLine(line.content, DEFAULT_DELIMITER)
|
||||
if (!arrayHeader) {
|
||||
throw new Error('Invalid array header')
|
||||
}
|
||||
|
||||
// Inline primitive array
|
||||
if (arrayHeader.inlineValues) {
|
||||
// For inline arrays, cursor should already be advanced or will be by caller
|
||||
return decodeInlinePrimitiveArray(header, arrayHeader.inlineValues, options)
|
||||
}
|
||||
|
||||
// For multi-line arrays (tabular or list), the cursor should already be positioned
|
||||
// at the array header line, but we haven't advanced past it yet
|
||||
|
||||
// Tabular array
|
||||
if (header.fields && header.fields.length > 0) {
|
||||
return decodeTabularArray(header, cursor, baseDepth, options)
|
||||
}
|
||||
|
||||
// List array
|
||||
return decodeListArray(header, cursor, baseDepth, options)
|
||||
}
|
||||
|
||||
function decodeInlinePrimitiveArray(
|
||||
header: ArrayHeaderInfo,
|
||||
inlineValues: string,
|
||||
options: ResolvedDecodeOptions,
|
||||
): JsonPrimitive[] {
|
||||
if (!inlineValues.trim()) {
|
||||
assertExpectedCount(0, header.length, 'inline array items', options)
|
||||
return []
|
||||
}
|
||||
|
||||
const values = splitDelimitedValues(inlineValues, header.delimiter)
|
||||
const primitives = parseRowValuesToPrimitives(values)
|
||||
|
||||
assertExpectedCount(primitives.length, header.length, 'inline array items', options)
|
||||
|
||||
return primitives
|
||||
}
|
||||
|
||||
function decodeListArray(
|
||||
header: ArrayHeaderInfo,
|
||||
cursor: LineCursor,
|
||||
baseDepth: Depth,
|
||||
options: ResolvedDecodeOptions,
|
||||
): JsonValue[] {
|
||||
const items: JsonValue[] = []
|
||||
const itemDepth = baseDepth + 1
|
||||
|
||||
while (!cursor.atEnd() && items.length < header.length) {
|
||||
const line = cursor.peek()
|
||||
if (!line || line.depth < itemDepth) {
|
||||
break
|
||||
}
|
||||
|
||||
if (line.depth === itemDepth && line.content.startsWith(LIST_ITEM_PREFIX)) {
|
||||
const item = decodeListItem(cursor, itemDepth, header.delimiter, options)
|
||||
items.push(item)
|
||||
}
|
||||
else {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
assertExpectedCount(items.length, header.length, 'list array items', options)
|
||||
|
||||
// In strict mode, check for extra items
|
||||
if (options.strict && !cursor.atEnd()) {
|
||||
const nextLine = cursor.peek()
|
||||
if (nextLine && nextLine.depth === itemDepth && nextLine.content.startsWith(LIST_ITEM_PREFIX)) {
|
||||
throw new Error(`Expected ${header.length} list array items, but found more`)
|
||||
}
|
||||
}
|
||||
|
||||
return items
|
||||
}
|
||||
|
||||
function decodeTabularArray(
|
||||
header: ArrayHeaderInfo,
|
||||
cursor: LineCursor,
|
||||
baseDepth: Depth,
|
||||
options: ResolvedDecodeOptions,
|
||||
): JsonObject[] {
|
||||
const objects: JsonObject[] = []
|
||||
const rowDepth = baseDepth + 1
|
||||
|
||||
while (!cursor.atEnd() && objects.length < header.length) {
|
||||
const line = cursor.peek()
|
||||
if (!line || line.depth < rowDepth) {
|
||||
break
|
||||
}
|
||||
|
||||
if (line.depth === rowDepth) {
|
||||
cursor.advance()
|
||||
const values = splitDelimitedValues(line.content, header.delimiter)
|
||||
assertExpectedCount(values.length, header.fields!.length, 'tabular row values', options)
|
||||
|
||||
const primitives = parseRowValuesToPrimitives(values)
|
||||
const obj: JsonObject = {}
|
||||
|
||||
for (let i = 0; i < header.fields!.length; i++) {
|
||||
obj[header.fields![i]!] = primitives[i]!
|
||||
}
|
||||
|
||||
objects.push(obj)
|
||||
}
|
||||
else {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
assertExpectedCount(objects.length, header.length, 'tabular rows', options)
|
||||
|
||||
// In strict mode, check for extra rows
|
||||
if (options.strict && !cursor.atEnd()) {
|
||||
const nextLine = cursor.peek()
|
||||
if (nextLine && nextLine.depth === rowDepth && !nextLine.content.startsWith(LIST_ITEM_PREFIX)) {
|
||||
// A key-value pair has a colon (and if it has delimiter, colon comes first)
|
||||
// A data row either has no colon, or has delimiter before colon
|
||||
const hasColon = nextLine.content.includes(COLON)
|
||||
const hasDelimiter = nextLine.content.includes(header.delimiter)
|
||||
|
||||
if (!hasColon) {
|
||||
// No colon = data row (for single-field tables)
|
||||
throw new Error(`Expected ${header.length} tabular rows, but found more`)
|
||||
}
|
||||
else if (hasDelimiter) {
|
||||
// Has both colon and delimiter - check which comes first
|
||||
const colonPos = nextLine.content.indexOf(COLON)
|
||||
const delimiterPos = nextLine.content.indexOf(header.delimiter)
|
||||
if (delimiterPos < colonPos) {
|
||||
// Delimiter before colon = data row
|
||||
throw new Error(`Expected ${header.length} tabular rows, but found more`)
|
||||
}
|
||||
// Colon before delimiter = key-value pair, OK
|
||||
}
|
||||
// Has colon but no delimiter = key-value pair, OK
|
||||
}
|
||||
}
|
||||
|
||||
return objects
|
||||
}
|
||||
|
||||
// #endregion
|
||||
|
||||
// #region List item decoding
|
||||
|
||||
function decodeListItem(
|
||||
cursor: LineCursor,
|
||||
baseDepth: Depth,
|
||||
activeDelimiter: string,
|
||||
options: ResolvedDecodeOptions,
|
||||
): JsonValue {
|
||||
const line = cursor.next()
|
||||
if (!line) {
|
||||
throw new Error('Expected list item')
|
||||
}
|
||||
|
||||
const afterHyphen = line.content.slice(LIST_ITEM_PREFIX.length)
|
||||
|
||||
// Check for array header after hyphen
|
||||
if (isArrayHeaderAfterHyphen(afterHyphen)) {
|
||||
const arrayHeader = parseArrayHeaderLine(afterHyphen, activeDelimiter as any)
|
||||
if (arrayHeader) {
|
||||
return decodeArrayFromHeader(arrayHeader.header, line, cursor, baseDepth, options)
|
||||
}
|
||||
}
|
||||
|
||||
// Check for object first field after hyphen
|
||||
if (isObjectFirstFieldAfterHyphen(afterHyphen)) {
|
||||
return decodeObjectFromListItem(line, cursor, baseDepth, options)
|
||||
}
|
||||
|
||||
// Primitive value
|
||||
return parsePrimitiveToken(afterHyphen)
|
||||
}
|
||||
|
||||
function decodeObjectFromListItem(
|
||||
firstLine: ParsedLine,
|
||||
cursor: LineCursor,
|
||||
baseDepth: Depth,
|
||||
options: ResolvedDecodeOptions,
|
||||
): JsonObject {
|
||||
const afterHyphen = firstLine.content.slice(LIST_ITEM_PREFIX.length)
|
||||
const { key, value, followDepth } = decodeFirstFieldOnHyphen(afterHyphen, cursor, baseDepth, options)
|
||||
|
||||
const obj: JsonObject = { [key]: value }
|
||||
|
||||
// Read subsequent fields
|
||||
while (!cursor.atEnd()) {
|
||||
const line = cursor.peek()
|
||||
if (!line || line.depth < followDepth) {
|
||||
break
|
||||
}
|
||||
|
||||
if (line.depth === followDepth && !line.content.startsWith(LIST_ITEM_PREFIX)) {
|
||||
const [k, v] = decodeKeyValuePair(line, cursor, followDepth, options)
|
||||
obj[k] = v
|
||||
}
|
||||
else {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return obj
|
||||
}
|
||||
|
||||
function decodeFirstFieldOnHyphen(
|
||||
rest: string,
|
||||
cursor: LineCursor,
|
||||
baseDepth: Depth,
|
||||
options: ResolvedDecodeOptions,
|
||||
): { key: string, value: JsonValue, followDepth: Depth } {
|
||||
// Check for array header as first field
|
||||
const arrayHeader = parseArrayHeaderLine(rest, DEFAULT_DELIMITER)
|
||||
if (arrayHeader) {
|
||||
// Create a synthetic line for array decoding
|
||||
const syntheticLine: ParsedLine = {
|
||||
raw: rest,
|
||||
content: rest,
|
||||
indent: baseDepth * options.indent,
|
||||
depth: baseDepth,
|
||||
}
|
||||
|
||||
const value = decodeArrayFromHeader(arrayHeader.header, syntheticLine, cursor, baseDepth, options)
|
||||
|
||||
// After an array, subsequent fields are at baseDepth + 1 (where array content is)
|
||||
return {
|
||||
key: arrayHeader.header.key!,
|
||||
value,
|
||||
followDepth: baseDepth + 1,
|
||||
}
|
||||
}
|
||||
|
||||
// Regular key-value pair
|
||||
const { key, end } = parseKeyToken(rest, 0)
|
||||
const afterKey = rest.slice(end).trim()
|
||||
|
||||
if (!afterKey) {
|
||||
// Nested object
|
||||
const nested = expectNestedObject(cursor, baseDepth + 1, options)
|
||||
return { key, value: nested, followDepth: baseDepth + 1 }
|
||||
}
|
||||
|
||||
// Inline primitive
|
||||
const value = parsePrimitiveToken(afterKey)
|
||||
return { key, value, followDepth: baseDepth + 1 }
|
||||
}
|
||||
|
||||
// #endregion
|
||||
|
||||
// #region Validation
|
||||
|
||||
function assertExpectedCount(actual: number, expected: number, what: string, options: ResolvedDecodeOptions): void {
|
||||
if (options.strict && actual !== expected) {
|
||||
throw new Error(`Expected ${expected} ${what}, but got ${actual}`)
|
||||
}
|
||||
}
|
||||
|
||||
// #endregion
|
||||
26
src/index.ts
26
src/index.ts
@@ -1,13 +1,19 @@
|
||||
import type {
|
||||
DecodeOptions,
|
||||
EncodeOptions,
|
||||
JsonValue,
|
||||
ResolvedDecodeOptions,
|
||||
ResolvedEncodeOptions,
|
||||
} from './types'
|
||||
import { DEFAULT_DELIMITER } from './constants'
|
||||
import { decodeValueFromLines } from './decoders'
|
||||
import { encodeValue } from './encoders'
|
||||
import { normalizeValue } from './normalize'
|
||||
import { LineCursor, toParsedLines } from './scanner'
|
||||
|
||||
export { DEFAULT_DELIMITER, DELIMITERS } from './constants'
|
||||
export type {
|
||||
DecodeOptions,
|
||||
Delimiter,
|
||||
DelimiterKey,
|
||||
EncodeOptions,
|
||||
@@ -15,6 +21,7 @@ export type {
|
||||
JsonObject,
|
||||
JsonPrimitive,
|
||||
JsonValue,
|
||||
ResolvedDecodeOptions,
|
||||
ResolvedEncodeOptions,
|
||||
} from './types'
|
||||
|
||||
@@ -24,6 +31,18 @@ export function encode(input: unknown, options?: EncodeOptions): string {
|
||||
return encodeValue(normalizedValue, resolvedOptions)
|
||||
}
|
||||
|
||||
export function decode(input: string, options?: DecodeOptions): JsonValue {
|
||||
const resolved = resolveDecodeOptions(options)
|
||||
const lines = toParsedLines(input, resolved.indent)
|
||||
|
||||
if (lines.length === 0) {
|
||||
throw new Error('Cannot decode empty input')
|
||||
}
|
||||
|
||||
const cursor = new LineCursor(lines)
|
||||
return decodeValueFromLines(cursor, resolved)
|
||||
}
|
||||
|
||||
function resolveOptions(options?: EncodeOptions): ResolvedEncodeOptions {
|
||||
return {
|
||||
indent: options?.indent ?? 2,
|
||||
@@ -31,3 +50,10 @@ function resolveOptions(options?: EncodeOptions): ResolvedEncodeOptions {
|
||||
lengthMarker: options?.lengthMarker ?? false,
|
||||
}
|
||||
}
|
||||
|
||||
function resolveDecodeOptions(options?: DecodeOptions): ResolvedDecodeOptions {
|
||||
return {
|
||||
indent: options?.indent ?? 2,
|
||||
strict: options?.strict ?? true,
|
||||
}
|
||||
}
|
||||
|
||||
393
src/parser.ts
Normal file
393
src/parser.ts
Normal file
@@ -0,0 +1,393 @@
|
||||
import type {
|
||||
ArrayHeaderInfo,
|
||||
Delimiter,
|
||||
JsonPrimitive,
|
||||
} from './types'
|
||||
import {
|
||||
BACKSLASH,
|
||||
CARRIAGE_RETURN,
|
||||
CLOSE_BRACE,
|
||||
CLOSE_BRACKET,
|
||||
COLON,
|
||||
DELIMITERS,
|
||||
DOUBLE_QUOTE,
|
||||
FALSE_LITERAL,
|
||||
HASH,
|
||||
NEWLINE,
|
||||
NULL_LITERAL,
|
||||
OPEN_BRACE,
|
||||
OPEN_BRACKET,
|
||||
PIPE,
|
||||
TAB,
|
||||
TRUE_LITERAL,
|
||||
} from './constants'
|
||||
|
||||
// #region Array header parsing
|
||||
|
||||
export function parseArrayHeaderLine(
|
||||
content: string,
|
||||
defaultDelimiter: Delimiter,
|
||||
): { header: ArrayHeaderInfo, inlineValues?: string } | undefined {
|
||||
// Don't match if the line starts with a quote (it's a quoted key, not an array)
|
||||
if (content.trimStart().startsWith(DOUBLE_QUOTE)) {
|
||||
return undefined
|
||||
}
|
||||
|
||||
// Find the bracket segment first
|
||||
const bracketStart = content.indexOf(OPEN_BRACKET)
|
||||
if (bracketStart === -1) {
|
||||
return undefined
|
||||
}
|
||||
|
||||
const bracketEnd = content.indexOf(CLOSE_BRACKET, bracketStart)
|
||||
if (bracketEnd === -1) {
|
||||
return undefined
|
||||
}
|
||||
|
||||
// Find the colon that comes after all brackets and braces
|
||||
let colonIndex = bracketEnd + 1
|
||||
let braceEnd = colonIndex
|
||||
|
||||
// Check for fields segment (braces come after bracket)
|
||||
const braceStart = content.indexOf(OPEN_BRACE, bracketEnd)
|
||||
if (braceStart !== -1 && braceStart < content.indexOf(COLON, bracketEnd)) {
|
||||
const foundBraceEnd = content.indexOf(CLOSE_BRACE, braceStart)
|
||||
if (foundBraceEnd !== -1) {
|
||||
braceEnd = foundBraceEnd + 1
|
||||
}
|
||||
}
|
||||
|
||||
// Now find colon after brackets and braces
|
||||
colonIndex = content.indexOf(COLON, Math.max(bracketEnd, braceEnd))
|
||||
if (colonIndex === -1) {
|
||||
return undefined
|
||||
}
|
||||
|
||||
const key = bracketStart > 0 ? content.slice(0, bracketStart) : undefined
|
||||
const afterColon = content.slice(colonIndex + 1).trim()
|
||||
|
||||
const bracketContent = content.slice(bracketStart + 1, bracketEnd)
|
||||
|
||||
// Try to parse bracket segment; return undefined if it fails
|
||||
let parsedBracket
|
||||
try {
|
||||
parsedBracket = parseBracketSegment(bracketContent, defaultDelimiter)
|
||||
}
|
||||
catch {
|
||||
return undefined
|
||||
}
|
||||
|
||||
const { length, delimiter, hasLengthMarker } = parsedBracket
|
||||
|
||||
// Check for fields segment
|
||||
let fields: string[] | undefined
|
||||
if (braceStart !== -1 && braceStart < colonIndex) {
|
||||
const foundBraceEnd = content.indexOf(CLOSE_BRACE, braceStart)
|
||||
if (foundBraceEnd !== -1 && foundBraceEnd < colonIndex) {
|
||||
const fieldsContent = content.slice(braceStart + 1, foundBraceEnd)
|
||||
fields = parseFieldsSegment(fieldsContent, delimiter)
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
header: {
|
||||
key,
|
||||
length,
|
||||
delimiter,
|
||||
fields,
|
||||
hasLengthMarker,
|
||||
},
|
||||
inlineValues: afterColon || undefined,
|
||||
}
|
||||
}
|
||||
|
||||
export function parseBracketSegment(
|
||||
seg: string,
|
||||
defaultDelimiter: Delimiter,
|
||||
): { length: number, delimiter: Delimiter, hasLengthMarker: boolean } {
|
||||
let hasLengthMarker = false
|
||||
let content = seg
|
||||
|
||||
// Check for length marker
|
||||
if (content.startsWith(HASH)) {
|
||||
hasLengthMarker = true
|
||||
content = content.slice(1)
|
||||
}
|
||||
|
||||
// Check for delimiter suffix
|
||||
let delimiter = defaultDelimiter
|
||||
if (content.endsWith(TAB)) {
|
||||
delimiter = DELIMITERS.tab
|
||||
content = content.slice(0, -1)
|
||||
}
|
||||
else if (content.endsWith(PIPE)) {
|
||||
delimiter = DELIMITERS.pipe
|
||||
content = content.slice(0, -1)
|
||||
}
|
||||
|
||||
const length = Number.parseInt(content, 10)
|
||||
if (Number.isNaN(length)) {
|
||||
throw new TypeError(`Invalid array length: ${seg}`)
|
||||
}
|
||||
|
||||
return { length, delimiter, hasLengthMarker }
|
||||
}
|
||||
|
||||
export function parseFieldsSegment(seg: string, delimiter: Delimiter): string[] {
|
||||
return splitDelimitedValues(seg, delimiter).map(field => parseStringLiteral(field.trim()))
|
||||
}
|
||||
|
||||
// #endregion
|
||||
|
||||
// #region Delimited value parsing
|
||||
|
||||
export function splitDelimitedValues(input: string, delimiter: Delimiter): string[] {
|
||||
const values: string[] = []
|
||||
let current = ''
|
||||
let inQuotes = false
|
||||
let i = 0
|
||||
|
||||
while (i < input.length) {
|
||||
const char = input[i]
|
||||
|
||||
if (char === BACKSLASH && i + 1 < input.length && inQuotes) {
|
||||
// Escape sequence in quoted string
|
||||
current += char + input[i + 1]
|
||||
i += 2
|
||||
continue
|
||||
}
|
||||
|
||||
if (char === DOUBLE_QUOTE) {
|
||||
inQuotes = !inQuotes
|
||||
current += char
|
||||
i++
|
||||
continue
|
||||
}
|
||||
|
||||
if (char === delimiter && !inQuotes) {
|
||||
values.push(current.trim())
|
||||
current = ''
|
||||
i++
|
||||
continue
|
||||
}
|
||||
|
||||
current += char
|
||||
i++
|
||||
}
|
||||
|
||||
// Add last value
|
||||
if (current || values.length > 0) {
|
||||
values.push(current.trim())
|
||||
}
|
||||
|
||||
return values
|
||||
}
|
||||
|
||||
export function parseRowValuesToPrimitives(values: string[]): JsonPrimitive[] {
|
||||
return values.map(v => parsePrimitiveToken(v))
|
||||
}
|
||||
|
||||
// #endregion
|
||||
|
||||
// #region Primitive and key parsing
|
||||
|
||||
export function parsePrimitiveToken(token: string): JsonPrimitive {
|
||||
const trimmed = token.trim()
|
||||
|
||||
// Empty token
|
||||
if (!trimmed) {
|
||||
return ''
|
||||
}
|
||||
|
||||
// Quoted string (if starts with quote, it MUST be properly quoted)
|
||||
if (trimmed.startsWith(DOUBLE_QUOTE)) {
|
||||
return parseStringLiteral(trimmed)
|
||||
}
|
||||
|
||||
// Boolean or null literals
|
||||
if (isBooleanOrNullLiteral(trimmed)) {
|
||||
if (trimmed === TRUE_LITERAL)
|
||||
return true
|
||||
if (trimmed === FALSE_LITERAL)
|
||||
return false
|
||||
if (trimmed === NULL_LITERAL)
|
||||
return null
|
||||
}
|
||||
|
||||
// Numeric literal
|
||||
if (isNumericLiteral(trimmed)) {
|
||||
return Number.parseFloat(trimmed)
|
||||
}
|
||||
|
||||
// Unquoted string
|
||||
return trimmed
|
||||
}
|
||||
|
||||
export function isBooleanOrNullLiteral(token: string): boolean {
|
||||
return token === TRUE_LITERAL || token === FALSE_LITERAL || token === NULL_LITERAL
|
||||
}
|
||||
|
||||
export function isNumericLiteral(token: string): boolean {
|
||||
if (!token)
|
||||
return false
|
||||
|
||||
// Must not have leading zeros (except for "0" itself or decimals like "0.5")
|
||||
if (token.length > 1 && token[0] === '0' && token[1] !== '.') {
|
||||
return false
|
||||
}
|
||||
|
||||
// Check if it's a valid number
|
||||
const num = Number(token)
|
||||
return !Number.isNaN(num) && Number.isFinite(num)
|
||||
}
|
||||
|
||||
export function parseStringLiteral(token: string): string {
|
||||
const trimmed = token.trim()
|
||||
|
||||
if (trimmed.startsWith(DOUBLE_QUOTE)) {
|
||||
// Find the closing quote, accounting for escaped quotes
|
||||
let i = 1
|
||||
while (i < trimmed.length) {
|
||||
if (trimmed[i] === BACKSLASH && i + 1 < trimmed.length) {
|
||||
// Skip escaped character
|
||||
i += 2
|
||||
continue
|
||||
}
|
||||
if (trimmed[i] === DOUBLE_QUOTE) {
|
||||
// Found closing quote
|
||||
if (i !== trimmed.length - 1) {
|
||||
throw new Error('Unexpected characters after closing quote')
|
||||
}
|
||||
const content = trimmed.slice(1, i)
|
||||
return unescapeString(content)
|
||||
}
|
||||
i++
|
||||
}
|
||||
|
||||
// If we get here, no closing quote was found
|
||||
throw new Error('Unterminated string: missing closing quote')
|
||||
}
|
||||
|
||||
return trimmed
|
||||
}
|
||||
|
||||
export function unescapeString(value: string): string {
|
||||
let result = ''
|
||||
let i = 0
|
||||
|
||||
while (i < value.length) {
|
||||
if (value[i] === BACKSLASH) {
|
||||
if (i + 1 >= value.length) {
|
||||
throw new Error('Invalid escape sequence: backslash at end of string')
|
||||
}
|
||||
|
||||
const next = value[i + 1]
|
||||
if (next === 'n') {
|
||||
result += NEWLINE
|
||||
i += 2
|
||||
continue
|
||||
}
|
||||
if (next === 't') {
|
||||
result += TAB
|
||||
i += 2
|
||||
continue
|
||||
}
|
||||
if (next === 'r') {
|
||||
result += CARRIAGE_RETURN
|
||||
i += 2
|
||||
continue
|
||||
}
|
||||
if (next === BACKSLASH) {
|
||||
result += BACKSLASH
|
||||
i += 2
|
||||
continue
|
||||
}
|
||||
if (next === DOUBLE_QUOTE) {
|
||||
result += DOUBLE_QUOTE
|
||||
i += 2
|
||||
continue
|
||||
}
|
||||
|
||||
throw new Error(`Invalid escape sequence: \\${next}`)
|
||||
}
|
||||
|
||||
result += value[i]
|
||||
i++
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
export function parseUnquotedKey(content: string, start: number): { key: string, end: number } {
|
||||
let end = start
|
||||
while (end < content.length && content[end] !== COLON) {
|
||||
end++
|
||||
}
|
||||
|
||||
// Validate that a colon was found
|
||||
if (end >= content.length || content[end] !== COLON) {
|
||||
throw new Error('Missing colon after key')
|
||||
}
|
||||
|
||||
const key = content.slice(start, end).trim()
|
||||
|
||||
// Skip the colon
|
||||
end++
|
||||
|
||||
return { key, end }
|
||||
}
|
||||
|
||||
export function parseQuotedKey(content: string, start: number): { key: string, end: number } {
|
||||
let i = start + 1 // Skip opening quote
|
||||
let keyContent = ''
|
||||
|
||||
while (i < content.length) {
|
||||
if (content[i] === BACKSLASH && i + 1 < content.length) {
|
||||
keyContent += content[i]! + content[i + 1]
|
||||
i += 2
|
||||
continue
|
||||
}
|
||||
|
||||
if (content[i] === DOUBLE_QUOTE) {
|
||||
// Found closing quote
|
||||
const key = unescapeString(keyContent)
|
||||
let end = i + 1
|
||||
|
||||
// Validate and skip colon after quoted key
|
||||
if (end >= content.length || content[end] !== COLON) {
|
||||
throw new Error('Missing colon after key')
|
||||
}
|
||||
end++
|
||||
|
||||
return { key, end }
|
||||
}
|
||||
|
||||
keyContent += content[i]
|
||||
i++
|
||||
}
|
||||
|
||||
throw new Error('Unterminated quoted key')
|
||||
}
|
||||
|
||||
export function parseKeyToken(content: string, start: number): { key: string, end: number } {
|
||||
if (content[start] === DOUBLE_QUOTE) {
|
||||
return parseQuotedKey(content, start)
|
||||
}
|
||||
else {
|
||||
return parseUnquotedKey(content, start)
|
||||
}
|
||||
}
|
||||
|
||||
// #endregion
|
||||
|
||||
// #region Array content detection helpers
|
||||
|
||||
export function isArrayHeaderAfterHyphen(content: string): boolean {
|
||||
return content.trim().startsWith(OPEN_BRACKET) && content.includes(COLON)
|
||||
}
|
||||
|
||||
export function isObjectFirstFieldAfterHyphen(content: string): boolean {
|
||||
return content.includes(COLON)
|
||||
}
|
||||
|
||||
// #endregion
|
||||
63
src/scanner.ts
Normal file
63
src/scanner.ts
Normal file
@@ -0,0 +1,63 @@
|
||||
import type { Depth, ParsedLine } from './types'
|
||||
import { SPACE } from './constants'
|
||||
|
||||
export class LineCursor {
|
||||
private lines: ParsedLine[]
|
||||
private index: number
|
||||
|
||||
constructor(lines: ParsedLine[]) {
|
||||
this.lines = lines
|
||||
this.index = 0
|
||||
}
|
||||
|
||||
peek(): ParsedLine | undefined {
|
||||
return this.lines[this.index]
|
||||
}
|
||||
|
||||
next(): ParsedLine | undefined {
|
||||
return this.lines[this.index++]
|
||||
}
|
||||
|
||||
current(): ParsedLine | undefined {
|
||||
return this.index > 0 ? this.lines[this.index - 1] : undefined
|
||||
}
|
||||
|
||||
advance(): void {
|
||||
this.index++
|
||||
}
|
||||
|
||||
atEnd(): boolean {
|
||||
return this.index >= this.lines.length
|
||||
}
|
||||
|
||||
get length(): number {
|
||||
return this.lines.length
|
||||
}
|
||||
}
|
||||
|
||||
export function toParsedLines(source: string, indentSize: number): ParsedLine[] {
|
||||
if (!source.trim()) {
|
||||
return []
|
||||
}
|
||||
|
||||
const lines = source.split('\n')
|
||||
const parsed: ParsedLine[] = []
|
||||
|
||||
for (const raw of lines) {
|
||||
let indent = 0
|
||||
while (indent < raw.length && raw[indent] === SPACE) {
|
||||
indent++
|
||||
}
|
||||
|
||||
const content = raw.slice(indent)
|
||||
const depth = computeDepthFromIndent(indent, indentSize)
|
||||
|
||||
parsed.push({ raw, indent, content, depth })
|
||||
}
|
||||
|
||||
return parsed
|
||||
}
|
||||
|
||||
function computeDepthFromIndent(indentSpaces: number, indentSize: number): Depth {
|
||||
return Math.floor(indentSpaces / indentSize)
|
||||
}
|
||||
38
src/types.ts
38
src/types.ts
@@ -36,4 +36,42 @@ export type ResolvedEncodeOptions = Readonly<Required<EncodeOptions>>
|
||||
|
||||
// #endregion
|
||||
|
||||
// #region Decoder options
|
||||
|
||||
export interface DecodeOptions {
|
||||
/**
|
||||
* Number of spaces per indentation level.
|
||||
* @default 2
|
||||
*/
|
||||
indent?: number
|
||||
/**
|
||||
* When true, enforce strict validation of array lengths and tabular row counts.
|
||||
* @default true
|
||||
*/
|
||||
strict?: boolean
|
||||
}
|
||||
|
||||
export type ResolvedDecodeOptions = Readonly<Required<DecodeOptions>>
|
||||
|
||||
// #endregion
|
||||
|
||||
// #region Decoder parsing types
|
||||
|
||||
export interface ArrayHeaderInfo {
|
||||
key?: string
|
||||
length: number
|
||||
delimiter: Delimiter
|
||||
fields?: string[]
|
||||
hasLengthMarker: boolean
|
||||
}
|
||||
|
||||
export interface ParsedLine {
|
||||
raw: string
|
||||
depth: Depth
|
||||
indent: number
|
||||
content: string
|
||||
}
|
||||
|
||||
// #endregion
|
||||
|
||||
export type Depth = number
|
||||
|
||||
Reference in New Issue
Block a user