feat: decode method (#10)

This commit is contained in:
Johann Schopplich
2025-10-29 07:42:15 +01:00
parent 7db91398fe
commit 45604b06e8
11 changed files with 1501 additions and 21 deletions

View File

@@ -11,6 +11,7 @@ export const COMMA = ','
export const COLON = ':'
export const SPACE = ' '
export const PIPE = '|'
export const HASH = '#'
// #endregion

419
src/decoders.ts Normal file
View File

@@ -0,0 +1,419 @@
import type { LineCursor } from './scanner'
import type {
ArrayHeaderInfo,
Depth,
JsonArray,
JsonObject,
JsonPrimitive,
JsonValue,
ParsedLine,
ResolvedDecodeOptions,
} from './types'
import {
COLON,
DEFAULT_DELIMITER,
LIST_ITEM_PREFIX,
} from './constants'
import {
isArrayHeaderAfterHyphen,
isObjectFirstFieldAfterHyphen,
parseArrayHeaderLine,
parseKeyToken,
parsePrimitiveToken,
parseRowValuesToPrimitives,
splitDelimitedValues,
} from './parser'
// #region Entry decoding
export function decodeValueFromLines(cursor: LineCursor, options: ResolvedDecodeOptions): JsonValue {
const first = cursor.peek()
if (!first) {
throw new Error('No content to decode')
}
// Check for root array
if (isRootArrayHeaderLine(first)) {
const headerInfo = parseArrayHeaderLine(first.content, DEFAULT_DELIMITER)
if (headerInfo) {
cursor.advance() // Move past the header line
return decodeArrayFromHeader(headerInfo.header, first, cursor, 0, options)
}
}
// Check for single primitive value
if (cursor.length === 1 && !isKeyValueLine(first)) {
return parsePrimitiveToken(first.content.trim())
}
// Default to object
return decodeObject(cursor, 0, options)
}
function isRootArrayHeaderLine(line: ParsedLine): boolean {
const content = line.content.trim()
// Root array: starts with [ and has a colon
return content.startsWith('[') && content.includes(COLON)
}
function isKeyValueLine(line: ParsedLine): boolean {
const content = line.content
// Look for unquoted colon or quoted key followed by colon
if (content.startsWith('"')) {
// Quoted key
let i = 1
while (i < content.length) {
if (content[i] === '\\' && i + 1 < content.length) {
i += 2
continue
}
if (content[i] === '"') {
// Found end of quoted key, check for colon
return content[i + 1] === COLON
}
i++
}
return false
}
else {
// Unquoted key - look for first colon not inside quotes
return content.includes(COLON)
}
}
// #endregion
// #region Object decoding
function decodeObject(cursor: LineCursor, baseDepth: Depth, options: ResolvedDecodeOptions): JsonObject {
const obj: JsonObject = {}
while (!cursor.atEnd()) {
const line = cursor.peek()
if (!line || line.depth < baseDepth) {
break
}
if (line.depth === baseDepth) {
const [key, value] = decodeKeyValuePair(line, cursor, baseDepth, options)
obj[key] = value
}
else {
break
}
}
return obj
}
function decodeKeyValuePair(
line: ParsedLine,
cursor: LineCursor,
baseDepth: Depth,
options: ResolvedDecodeOptions,
): [key: string, value: JsonValue] {
cursor.advance()
// Check for array header first (before parsing key)
const arrayHeader = parseArrayHeaderLine(line.content, DEFAULT_DELIMITER)
if (arrayHeader && arrayHeader.header.key) {
const value = decodeArrayFromHeader(arrayHeader.header, line, cursor, baseDepth, options)
return [arrayHeader.header.key, value]
}
// Regular key-value pair
const { key, end } = parseKeyToken(line.content, 0)
const rest = line.content.slice(end).trim()
// No value after colon - expect nested object or empty
if (!rest) {
const nextLine = cursor.peek()
if (nextLine && nextLine.depth > baseDepth) {
const nested = expectNestedObject(cursor, baseDepth + 1, options)
return [key, nested]
}
// Empty object
return [key, {}]
}
// Inline primitive value
const value = parsePrimitiveToken(rest)
return [key, value]
}
function expectNestedObject(cursor: LineCursor, nestedDepth: Depth, options: ResolvedDecodeOptions): JsonObject {
return decodeObject(cursor, nestedDepth, options)
}
// #endregion
// #region Array decoding
function decodeArrayFromHeader(
header: ArrayHeaderInfo,
line: ParsedLine,
cursor: LineCursor,
baseDepth: Depth,
options: ResolvedDecodeOptions,
): JsonArray {
const arrayHeader = parseArrayHeaderLine(line.content, DEFAULT_DELIMITER)
if (!arrayHeader) {
throw new Error('Invalid array header')
}
// Inline primitive array
if (arrayHeader.inlineValues) {
// For inline arrays, cursor should already be advanced or will be by caller
return decodeInlinePrimitiveArray(header, arrayHeader.inlineValues, options)
}
// For multi-line arrays (tabular or list), the cursor should already be positioned
// at the array header line, but we haven't advanced past it yet
// Tabular array
if (header.fields && header.fields.length > 0) {
return decodeTabularArray(header, cursor, baseDepth, options)
}
// List array
return decodeListArray(header, cursor, baseDepth, options)
}
function decodeInlinePrimitiveArray(
header: ArrayHeaderInfo,
inlineValues: string,
options: ResolvedDecodeOptions,
): JsonPrimitive[] {
if (!inlineValues.trim()) {
assertExpectedCount(0, header.length, 'inline array items', options)
return []
}
const values = splitDelimitedValues(inlineValues, header.delimiter)
const primitives = parseRowValuesToPrimitives(values)
assertExpectedCount(primitives.length, header.length, 'inline array items', options)
return primitives
}
function decodeListArray(
header: ArrayHeaderInfo,
cursor: LineCursor,
baseDepth: Depth,
options: ResolvedDecodeOptions,
): JsonValue[] {
const items: JsonValue[] = []
const itemDepth = baseDepth + 1
while (!cursor.atEnd() && items.length < header.length) {
const line = cursor.peek()
if (!line || line.depth < itemDepth) {
break
}
if (line.depth === itemDepth && line.content.startsWith(LIST_ITEM_PREFIX)) {
const item = decodeListItem(cursor, itemDepth, header.delimiter, options)
items.push(item)
}
else {
break
}
}
assertExpectedCount(items.length, header.length, 'list array items', options)
// In strict mode, check for extra items
if (options.strict && !cursor.atEnd()) {
const nextLine = cursor.peek()
if (nextLine && nextLine.depth === itemDepth && nextLine.content.startsWith(LIST_ITEM_PREFIX)) {
throw new Error(`Expected ${header.length} list array items, but found more`)
}
}
return items
}
function decodeTabularArray(
header: ArrayHeaderInfo,
cursor: LineCursor,
baseDepth: Depth,
options: ResolvedDecodeOptions,
): JsonObject[] {
const objects: JsonObject[] = []
const rowDepth = baseDepth + 1
while (!cursor.atEnd() && objects.length < header.length) {
const line = cursor.peek()
if (!line || line.depth < rowDepth) {
break
}
if (line.depth === rowDepth) {
cursor.advance()
const values = splitDelimitedValues(line.content, header.delimiter)
assertExpectedCount(values.length, header.fields!.length, 'tabular row values', options)
const primitives = parseRowValuesToPrimitives(values)
const obj: JsonObject = {}
for (let i = 0; i < header.fields!.length; i++) {
obj[header.fields![i]!] = primitives[i]!
}
objects.push(obj)
}
else {
break
}
}
assertExpectedCount(objects.length, header.length, 'tabular rows', options)
// In strict mode, check for extra rows
if (options.strict && !cursor.atEnd()) {
const nextLine = cursor.peek()
if (nextLine && nextLine.depth === rowDepth && !nextLine.content.startsWith(LIST_ITEM_PREFIX)) {
// A key-value pair has a colon (and if it has delimiter, colon comes first)
// A data row either has no colon, or has delimiter before colon
const hasColon = nextLine.content.includes(COLON)
const hasDelimiter = nextLine.content.includes(header.delimiter)
if (!hasColon) {
// No colon = data row (for single-field tables)
throw new Error(`Expected ${header.length} tabular rows, but found more`)
}
else if (hasDelimiter) {
// Has both colon and delimiter - check which comes first
const colonPos = nextLine.content.indexOf(COLON)
const delimiterPos = nextLine.content.indexOf(header.delimiter)
if (delimiterPos < colonPos) {
// Delimiter before colon = data row
throw new Error(`Expected ${header.length} tabular rows, but found more`)
}
// Colon before delimiter = key-value pair, OK
}
// Has colon but no delimiter = key-value pair, OK
}
}
return objects
}
// #endregion
// #region List item decoding
function decodeListItem(
cursor: LineCursor,
baseDepth: Depth,
activeDelimiter: string,
options: ResolvedDecodeOptions,
): JsonValue {
const line = cursor.next()
if (!line) {
throw new Error('Expected list item')
}
const afterHyphen = line.content.slice(LIST_ITEM_PREFIX.length)
// Check for array header after hyphen
if (isArrayHeaderAfterHyphen(afterHyphen)) {
const arrayHeader = parseArrayHeaderLine(afterHyphen, activeDelimiter as any)
if (arrayHeader) {
return decodeArrayFromHeader(arrayHeader.header, line, cursor, baseDepth, options)
}
}
// Check for object first field after hyphen
if (isObjectFirstFieldAfterHyphen(afterHyphen)) {
return decodeObjectFromListItem(line, cursor, baseDepth, options)
}
// Primitive value
return parsePrimitiveToken(afterHyphen)
}
function decodeObjectFromListItem(
firstLine: ParsedLine,
cursor: LineCursor,
baseDepth: Depth,
options: ResolvedDecodeOptions,
): JsonObject {
const afterHyphen = firstLine.content.slice(LIST_ITEM_PREFIX.length)
const { key, value, followDepth } = decodeFirstFieldOnHyphen(afterHyphen, cursor, baseDepth, options)
const obj: JsonObject = { [key]: value }
// Read subsequent fields
while (!cursor.atEnd()) {
const line = cursor.peek()
if (!line || line.depth < followDepth) {
break
}
if (line.depth === followDepth && !line.content.startsWith(LIST_ITEM_PREFIX)) {
const [k, v] = decodeKeyValuePair(line, cursor, followDepth, options)
obj[k] = v
}
else {
break
}
}
return obj
}
function decodeFirstFieldOnHyphen(
rest: string,
cursor: LineCursor,
baseDepth: Depth,
options: ResolvedDecodeOptions,
): { key: string, value: JsonValue, followDepth: Depth } {
// Check for array header as first field
const arrayHeader = parseArrayHeaderLine(rest, DEFAULT_DELIMITER)
if (arrayHeader) {
// Create a synthetic line for array decoding
const syntheticLine: ParsedLine = {
raw: rest,
content: rest,
indent: baseDepth * options.indent,
depth: baseDepth,
}
const value = decodeArrayFromHeader(arrayHeader.header, syntheticLine, cursor, baseDepth, options)
// After an array, subsequent fields are at baseDepth + 1 (where array content is)
return {
key: arrayHeader.header.key!,
value,
followDepth: baseDepth + 1,
}
}
// Regular key-value pair
const { key, end } = parseKeyToken(rest, 0)
const afterKey = rest.slice(end).trim()
if (!afterKey) {
// Nested object
const nested = expectNestedObject(cursor, baseDepth + 1, options)
return { key, value: nested, followDepth: baseDepth + 1 }
}
// Inline primitive
const value = parsePrimitiveToken(afterKey)
return { key, value, followDepth: baseDepth + 1 }
}
// #endregion
// #region Validation
function assertExpectedCount(actual: number, expected: number, what: string, options: ResolvedDecodeOptions): void {
if (options.strict && actual !== expected) {
throw new Error(`Expected ${expected} ${what}, but got ${actual}`)
}
}
// #endregion

View File

@@ -1,13 +1,19 @@
import type {
DecodeOptions,
EncodeOptions,
JsonValue,
ResolvedDecodeOptions,
ResolvedEncodeOptions,
} from './types'
import { DEFAULT_DELIMITER } from './constants'
import { decodeValueFromLines } from './decoders'
import { encodeValue } from './encoders'
import { normalizeValue } from './normalize'
import { LineCursor, toParsedLines } from './scanner'
export { DEFAULT_DELIMITER, DELIMITERS } from './constants'
export type {
DecodeOptions,
Delimiter,
DelimiterKey,
EncodeOptions,
@@ -15,6 +21,7 @@ export type {
JsonObject,
JsonPrimitive,
JsonValue,
ResolvedDecodeOptions,
ResolvedEncodeOptions,
} from './types'
@@ -24,6 +31,18 @@ export function encode(input: unknown, options?: EncodeOptions): string {
return encodeValue(normalizedValue, resolvedOptions)
}
export function decode(input: string, options?: DecodeOptions): JsonValue {
const resolved = resolveDecodeOptions(options)
const lines = toParsedLines(input, resolved.indent)
if (lines.length === 0) {
throw new Error('Cannot decode empty input')
}
const cursor = new LineCursor(lines)
return decodeValueFromLines(cursor, resolved)
}
function resolveOptions(options?: EncodeOptions): ResolvedEncodeOptions {
return {
indent: options?.indent ?? 2,
@@ -31,3 +50,10 @@ function resolveOptions(options?: EncodeOptions): ResolvedEncodeOptions {
lengthMarker: options?.lengthMarker ?? false,
}
}
function resolveDecodeOptions(options?: DecodeOptions): ResolvedDecodeOptions {
return {
indent: options?.indent ?? 2,
strict: options?.strict ?? true,
}
}

393
src/parser.ts Normal file
View File

@@ -0,0 +1,393 @@
import type {
ArrayHeaderInfo,
Delimiter,
JsonPrimitive,
} from './types'
import {
BACKSLASH,
CARRIAGE_RETURN,
CLOSE_BRACE,
CLOSE_BRACKET,
COLON,
DELIMITERS,
DOUBLE_QUOTE,
FALSE_LITERAL,
HASH,
NEWLINE,
NULL_LITERAL,
OPEN_BRACE,
OPEN_BRACKET,
PIPE,
TAB,
TRUE_LITERAL,
} from './constants'
// #region Array header parsing
export function parseArrayHeaderLine(
content: string,
defaultDelimiter: Delimiter,
): { header: ArrayHeaderInfo, inlineValues?: string } | undefined {
// Don't match if the line starts with a quote (it's a quoted key, not an array)
if (content.trimStart().startsWith(DOUBLE_QUOTE)) {
return undefined
}
// Find the bracket segment first
const bracketStart = content.indexOf(OPEN_BRACKET)
if (bracketStart === -1) {
return undefined
}
const bracketEnd = content.indexOf(CLOSE_BRACKET, bracketStart)
if (bracketEnd === -1) {
return undefined
}
// Find the colon that comes after all brackets and braces
let colonIndex = bracketEnd + 1
let braceEnd = colonIndex
// Check for fields segment (braces come after bracket)
const braceStart = content.indexOf(OPEN_BRACE, bracketEnd)
if (braceStart !== -1 && braceStart < content.indexOf(COLON, bracketEnd)) {
const foundBraceEnd = content.indexOf(CLOSE_BRACE, braceStart)
if (foundBraceEnd !== -1) {
braceEnd = foundBraceEnd + 1
}
}
// Now find colon after brackets and braces
colonIndex = content.indexOf(COLON, Math.max(bracketEnd, braceEnd))
if (colonIndex === -1) {
return undefined
}
const key = bracketStart > 0 ? content.slice(0, bracketStart) : undefined
const afterColon = content.slice(colonIndex + 1).trim()
const bracketContent = content.slice(bracketStart + 1, bracketEnd)
// Try to parse bracket segment; return undefined if it fails
let parsedBracket
try {
parsedBracket = parseBracketSegment(bracketContent, defaultDelimiter)
}
catch {
return undefined
}
const { length, delimiter, hasLengthMarker } = parsedBracket
// Check for fields segment
let fields: string[] | undefined
if (braceStart !== -1 && braceStart < colonIndex) {
const foundBraceEnd = content.indexOf(CLOSE_BRACE, braceStart)
if (foundBraceEnd !== -1 && foundBraceEnd < colonIndex) {
const fieldsContent = content.slice(braceStart + 1, foundBraceEnd)
fields = parseFieldsSegment(fieldsContent, delimiter)
}
}
return {
header: {
key,
length,
delimiter,
fields,
hasLengthMarker,
},
inlineValues: afterColon || undefined,
}
}
export function parseBracketSegment(
seg: string,
defaultDelimiter: Delimiter,
): { length: number, delimiter: Delimiter, hasLengthMarker: boolean } {
let hasLengthMarker = false
let content = seg
// Check for length marker
if (content.startsWith(HASH)) {
hasLengthMarker = true
content = content.slice(1)
}
// Check for delimiter suffix
let delimiter = defaultDelimiter
if (content.endsWith(TAB)) {
delimiter = DELIMITERS.tab
content = content.slice(0, -1)
}
else if (content.endsWith(PIPE)) {
delimiter = DELIMITERS.pipe
content = content.slice(0, -1)
}
const length = Number.parseInt(content, 10)
if (Number.isNaN(length)) {
throw new TypeError(`Invalid array length: ${seg}`)
}
return { length, delimiter, hasLengthMarker }
}
export function parseFieldsSegment(seg: string, delimiter: Delimiter): string[] {
return splitDelimitedValues(seg, delimiter).map(field => parseStringLiteral(field.trim()))
}
// #endregion
// #region Delimited value parsing
export function splitDelimitedValues(input: string, delimiter: Delimiter): string[] {
const values: string[] = []
let current = ''
let inQuotes = false
let i = 0
while (i < input.length) {
const char = input[i]
if (char === BACKSLASH && i + 1 < input.length && inQuotes) {
// Escape sequence in quoted string
current += char + input[i + 1]
i += 2
continue
}
if (char === DOUBLE_QUOTE) {
inQuotes = !inQuotes
current += char
i++
continue
}
if (char === delimiter && !inQuotes) {
values.push(current.trim())
current = ''
i++
continue
}
current += char
i++
}
// Add last value
if (current || values.length > 0) {
values.push(current.trim())
}
return values
}
export function parseRowValuesToPrimitives(values: string[]): JsonPrimitive[] {
return values.map(v => parsePrimitiveToken(v))
}
// #endregion
// #region Primitive and key parsing
export function parsePrimitiveToken(token: string): JsonPrimitive {
const trimmed = token.trim()
// Empty token
if (!trimmed) {
return ''
}
// Quoted string (if starts with quote, it MUST be properly quoted)
if (trimmed.startsWith(DOUBLE_QUOTE)) {
return parseStringLiteral(trimmed)
}
// Boolean or null literals
if (isBooleanOrNullLiteral(trimmed)) {
if (trimmed === TRUE_LITERAL)
return true
if (trimmed === FALSE_LITERAL)
return false
if (trimmed === NULL_LITERAL)
return null
}
// Numeric literal
if (isNumericLiteral(trimmed)) {
return Number.parseFloat(trimmed)
}
// Unquoted string
return trimmed
}
export function isBooleanOrNullLiteral(token: string): boolean {
return token === TRUE_LITERAL || token === FALSE_LITERAL || token === NULL_LITERAL
}
export function isNumericLiteral(token: string): boolean {
if (!token)
return false
// Must not have leading zeros (except for "0" itself or decimals like "0.5")
if (token.length > 1 && token[0] === '0' && token[1] !== '.') {
return false
}
// Check if it's a valid number
const num = Number(token)
return !Number.isNaN(num) && Number.isFinite(num)
}
export function parseStringLiteral(token: string): string {
const trimmed = token.trim()
if (trimmed.startsWith(DOUBLE_QUOTE)) {
// Find the closing quote, accounting for escaped quotes
let i = 1
while (i < trimmed.length) {
if (trimmed[i] === BACKSLASH && i + 1 < trimmed.length) {
// Skip escaped character
i += 2
continue
}
if (trimmed[i] === DOUBLE_QUOTE) {
// Found closing quote
if (i !== trimmed.length - 1) {
throw new Error('Unexpected characters after closing quote')
}
const content = trimmed.slice(1, i)
return unescapeString(content)
}
i++
}
// If we get here, no closing quote was found
throw new Error('Unterminated string: missing closing quote')
}
return trimmed
}
export function unescapeString(value: string): string {
let result = ''
let i = 0
while (i < value.length) {
if (value[i] === BACKSLASH) {
if (i + 1 >= value.length) {
throw new Error('Invalid escape sequence: backslash at end of string')
}
const next = value[i + 1]
if (next === 'n') {
result += NEWLINE
i += 2
continue
}
if (next === 't') {
result += TAB
i += 2
continue
}
if (next === 'r') {
result += CARRIAGE_RETURN
i += 2
continue
}
if (next === BACKSLASH) {
result += BACKSLASH
i += 2
continue
}
if (next === DOUBLE_QUOTE) {
result += DOUBLE_QUOTE
i += 2
continue
}
throw new Error(`Invalid escape sequence: \\${next}`)
}
result += value[i]
i++
}
return result
}
export function parseUnquotedKey(content: string, start: number): { key: string, end: number } {
let end = start
while (end < content.length && content[end] !== COLON) {
end++
}
// Validate that a colon was found
if (end >= content.length || content[end] !== COLON) {
throw new Error('Missing colon after key')
}
const key = content.slice(start, end).trim()
// Skip the colon
end++
return { key, end }
}
export function parseQuotedKey(content: string, start: number): { key: string, end: number } {
let i = start + 1 // Skip opening quote
let keyContent = ''
while (i < content.length) {
if (content[i] === BACKSLASH && i + 1 < content.length) {
keyContent += content[i]! + content[i + 1]
i += 2
continue
}
if (content[i] === DOUBLE_QUOTE) {
// Found closing quote
const key = unescapeString(keyContent)
let end = i + 1
// Validate and skip colon after quoted key
if (end >= content.length || content[end] !== COLON) {
throw new Error('Missing colon after key')
}
end++
return { key, end }
}
keyContent += content[i]
i++
}
throw new Error('Unterminated quoted key')
}
export function parseKeyToken(content: string, start: number): { key: string, end: number } {
if (content[start] === DOUBLE_QUOTE) {
return parseQuotedKey(content, start)
}
else {
return parseUnquotedKey(content, start)
}
}
// #endregion
// #region Array content detection helpers
export function isArrayHeaderAfterHyphen(content: string): boolean {
return content.trim().startsWith(OPEN_BRACKET) && content.includes(COLON)
}
export function isObjectFirstFieldAfterHyphen(content: string): boolean {
return content.includes(COLON)
}
// #endregion

63
src/scanner.ts Normal file
View File

@@ -0,0 +1,63 @@
import type { Depth, ParsedLine } from './types'
import { SPACE } from './constants'
export class LineCursor {
private lines: ParsedLine[]
private index: number
constructor(lines: ParsedLine[]) {
this.lines = lines
this.index = 0
}
peek(): ParsedLine | undefined {
return this.lines[this.index]
}
next(): ParsedLine | undefined {
return this.lines[this.index++]
}
current(): ParsedLine | undefined {
return this.index > 0 ? this.lines[this.index - 1] : undefined
}
advance(): void {
this.index++
}
atEnd(): boolean {
return this.index >= this.lines.length
}
get length(): number {
return this.lines.length
}
}
export function toParsedLines(source: string, indentSize: number): ParsedLine[] {
if (!source.trim()) {
return []
}
const lines = source.split('\n')
const parsed: ParsedLine[] = []
for (const raw of lines) {
let indent = 0
while (indent < raw.length && raw[indent] === SPACE) {
indent++
}
const content = raw.slice(indent)
const depth = computeDepthFromIndent(indent, indentSize)
parsed.push({ raw, indent, content, depth })
}
return parsed
}
function computeDepthFromIndent(indentSpaces: number, indentSize: number): Depth {
return Math.floor(indentSpaces / indentSize)
}

View File

@@ -36,4 +36,42 @@ export type ResolvedEncodeOptions = Readonly<Required<EncodeOptions>>
// #endregion
// #region Decoder options
export interface DecodeOptions {
/**
* Number of spaces per indentation level.
* @default 2
*/
indent?: number
/**
* When true, enforce strict validation of array lengths and tabular row counts.
* @default true
*/
strict?: boolean
}
export type ResolvedDecodeOptions = Readonly<Required<DecodeOptions>>
// #endregion
// #region Decoder parsing types
export interface ArrayHeaderInfo {
key?: string
length: number
delimiter: Delimiter
fields?: string[]
hasLengthMarker: boolean
}
export interface ParsedLine {
raw: string
depth: Depth
indent: number
content: string
}
// #endregion
export type Depth = number