diff --git a/src/decoders.ts b/src/decode/decoders.ts similarity index 75% rename from src/decoders.ts rename to src/decode/decoders.ts index 17eeac1..e752e03 100644 --- a/src/decoders.ts +++ b/src/decode/decoders.ts @@ -1,6 +1,6 @@ -import type { LineCursor } from './scanner' import type { ArrayHeaderInfo, + Delimiter, Depth, JsonArray, JsonObject, @@ -8,12 +8,13 @@ import type { JsonValue, ParsedLine, ResolvedDecodeOptions, -} from './types' +} from '../types' +import type { LineCursor } from './scanner' import { COLON, DEFAULT_DELIMITER, LIST_ITEM_PREFIX, -} from './constants' +} from '../constants' import { isArrayHeaderAfterHyphen, isObjectFirstFieldAfterHyphen, @@ -23,6 +24,12 @@ import { parseKeyToken, parsePrimitiveToken, } from './parser' +import { findClosingQuote } from './string-utils' +import { + assertExpectedCount, + validateNoExtraListItems, + validateNoExtraTabularRows, +} from './validation' // #region Entry decoding @@ -33,7 +40,7 @@ export function decodeValueFromLines(cursor: LineCursor, options: ResolvedDecode } // Check for root array - if (isRootArrayHeaderLine(first)) { + if (isArrayHeaderAfterHyphen(first.content)) { const headerInfo = parseArrayHeaderLine(first.content, DEFAULT_DELIMITER) if (headerInfo) { cursor.advance() // Move past the header line @@ -50,28 +57,17 @@ export function decodeValueFromLines(cursor: LineCursor, options: ResolvedDecode return decodeObject(cursor, 0, options) } -function isRootArrayHeaderLine(line: ParsedLine): boolean { - return isArrayHeaderAfterHyphen(line.content) -} - function isKeyValueLine(line: ParsedLine): boolean { const content = line.content // Look for unquoted colon or quoted key followed by colon if (content.startsWith('"')) { - // Quoted key - let i = 1 - while (i < content.length) { - if (content[i] === '\\' && i + 1 < content.length) { - i += 2 - continue - } - if (content[i] === '"') { - // Found end of quoted key, check for colon - return content[i + 1] === COLON - } - i++ + // Quoted key - find the closing quote + const closingQuoteIndex = findClosingQuote(content, 0) + if (closingQuoteIndex === -1) { + return false } - return false + // Check if there's a colon after the quoted key + return closingQuoteIndex + 1 < content.length && content[closingQuoteIndex + 1] === COLON } else { // Unquoted key - look for first colon not inside quotes @@ -227,11 +223,8 @@ function decodeListArray( assertExpectedCount(items.length, header.length, 'list array items', options) // In strict mode, check for extra items - if (options.strict && !cursor.atEnd()) { - const nextLine = cursor.peek() - if (nextLine && nextLine.depth === itemDepth && nextLine.content.startsWith(LIST_ITEM_PREFIX)) { - throw new RangeError(`Expected ${header.length} list array items, but found more`) - } + if (options.strict) { + validateNoExtraListItems(cursor, itemDepth, header.length) } return items @@ -274,30 +267,8 @@ function decodeTabularArray( assertExpectedCount(objects.length, header.length, 'tabular rows', options) // In strict mode, check for extra rows - if (options.strict && !cursor.atEnd()) { - const nextLine = cursor.peek() - if (nextLine && nextLine.depth === rowDepth && !nextLine.content.startsWith(LIST_ITEM_PREFIX)) { - // A key-value pair has a colon (and if it has delimiter, colon comes first) - // A data row either has no colon, or has delimiter before colon - const hasColon = nextLine.content.includes(COLON) - const hasDelimiter = nextLine.content.includes(header.delimiter) - - if (!hasColon) { - // No colon = data row (for single-field tables) - throw new RangeError(`Expected ${header.length} tabular rows, but found more`) - } - else if (hasDelimiter) { - // Has both colon and delimiter - check which comes first - const colonPos = nextLine.content.indexOf(COLON) - const delimiterPos = nextLine.content.indexOf(header.delimiter) - if (delimiterPos < colonPos) { - // Delimiter before colon = data row - throw new RangeError(`Expected ${header.length} tabular rows, but found more`) - } - // Colon before delimiter = key-value pair, OK - } - // Has colon but no delimiter = key-value pair, OK - } + if (options.strict) { + validateNoExtraTabularRows(cursor, rowDepth, header) } return objects @@ -310,7 +281,7 @@ function decodeTabularArray( function decodeListItem( cursor: LineCursor, baseDepth: Depth, - activeDelimiter: string, + activeDelimiter: Delimiter, options: ResolvedDecodeOptions, ): JsonValue { const line = cursor.next() @@ -322,7 +293,7 @@ function decodeListItem( // Check for array header after hyphen if (isArrayHeaderAfterHyphen(afterHyphen)) { - const arrayHeader = parseArrayHeaderLine(afterHyphen, activeDelimiter as any) + const arrayHeader = parseArrayHeaderLine(afterHyphen, activeDelimiter) if (arrayHeader) { return decodeArrayFromHeader(arrayHeader.header, arrayHeader.inlineValues, cursor, baseDepth, options) } @@ -344,7 +315,7 @@ function decodeObjectFromListItem( options: ResolvedDecodeOptions, ): JsonObject { const afterHyphen = firstLine.content.slice(LIST_ITEM_PREFIX.length) - const { key, value, followDepth } = decodeFirstFieldOnHyphen(afterHyphen, cursor, baseDepth, options) + const { key, value, followDepth } = decodeKeyValue(afterHyphen, cursor, baseDepth, options) const obj: JsonObject = { [key]: value } @@ -367,23 +338,4 @@ function decodeObjectFromListItem( return obj } -function decodeFirstFieldOnHyphen( - rest: string, - cursor: LineCursor, - baseDepth: Depth, - options: ResolvedDecodeOptions, -): { key: string, value: JsonValue, followDepth: Depth } { - return decodeKeyValue(rest, cursor, baseDepth, options) -} - -// #endregion - -// #region Validation - -function assertExpectedCount(actual: number, expected: number, what: string, options: ResolvedDecodeOptions): void { - if (options.strict && actual !== expected) { - throw new RangeError(`Expected ${expected} ${what}, but got ${actual}`) - } -} - // #endregion diff --git a/src/parser.ts b/src/decode/parser.ts similarity index 84% rename from src/parser.ts rename to src/decode/parser.ts index 39717e9..d678e7e 100644 --- a/src/parser.ts +++ b/src/decode/parser.ts @@ -2,7 +2,7 @@ import type { ArrayHeaderInfo, Delimiter, JsonPrimitive, -} from './types' +} from '../types' import { BACKSLASH, CARRIAGE_RETURN, @@ -20,7 +20,8 @@ import { PIPE, TAB, TRUE_LITERAL, -} from './constants' +} from '../constants' +import { findClosingQuote, hasUnquotedChar } from './string-utils' // #region Array header parsing @@ -246,26 +247,19 @@ export function parseStringLiteral(token: string): string { if (trimmed.startsWith(DOUBLE_QUOTE)) { // Find the closing quote, accounting for escaped quotes - let i = 1 - while (i < trimmed.length) { - if (trimmed[i] === BACKSLASH && i + 1 < trimmed.length) { - // Skip escaped character - i += 2 - continue - } - if (trimmed[i] === DOUBLE_QUOTE) { - // Found closing quote - if (i !== trimmed.length - 1) { - throw new SyntaxError('Unexpected characters after closing quote') - } - const content = trimmed.slice(1, i) - return unescapeString(content) - } - i++ + const closingQuoteIndex = findClosingQuote(trimmed, 0) + + if (closingQuoteIndex === -1) { + // No closing quote was found + throw new SyntaxError('Unterminated string: missing closing quote') } - // If we get here, no closing quote was found - throw new SyntaxError('Unterminated string: missing closing quote') + if (closingQuoteIndex !== trimmed.length - 1) { + throw new SyntaxError('Unexpected characters after closing quote') + } + + const content = trimmed.slice(1, closingQuoteIndex) + return unescapeString(content) } return trimmed @@ -338,35 +332,25 @@ export function parseUnquotedKey(content: string, start: number): { key: string, } export function parseQuotedKey(content: string, start: number): { key: string, end: number } { - let i = start + 1 // Skip opening quote - let keyContent = '' + // Find the closing quote, accounting for escaped quotes + const closingQuoteIndex = findClosingQuote(content, start) - while (i < content.length) { - if (content[i] === BACKSLASH && i + 1 < content.length) { - keyContent += content[i]! + content[i + 1] - i += 2 - continue - } - - if (content[i] === DOUBLE_QUOTE) { - // Found closing quote - const key = unescapeString(keyContent) - let end = i + 1 - - // Validate and skip colon after quoted key - if (end >= content.length || content[end] !== COLON) { - throw new SyntaxError('Missing colon after key') - } - end++ - - return { key, end } - } - - keyContent += content[i] - i++ + if (closingQuoteIndex === -1) { + throw new SyntaxError('Unterminated quoted key') } - throw new SyntaxError('Unterminated quoted key') + // Extract and unescape the key content + const keyContent = content.slice(start + 1, closingQuoteIndex) + const key = unescapeString(keyContent) + let end = closingQuoteIndex + 1 + + // Validate and skip colon after quoted key + if (end >= content.length || content[end] !== COLON) { + throw new SyntaxError('Missing colon after key') + } + end++ + + return { key, end } } export function parseKeyToken(content: string, start: number): { key: string, end: number } { @@ -383,11 +367,11 @@ export function parseKeyToken(content: string, start: number): { key: string, en // #region Array content detection helpers export function isArrayHeaderAfterHyphen(content: string): boolean { - return content.trim().startsWith(OPEN_BRACKET) && content.includes(COLON) + return content.trim().startsWith(OPEN_BRACKET) && hasUnquotedChar(content, COLON) } export function isObjectFirstFieldAfterHyphen(content: string): boolean { - return content.includes(COLON) + return hasUnquotedChar(content, COLON) } // #endregion diff --git a/src/scanner.ts b/src/decode/scanner.ts similarity index 73% rename from src/scanner.ts rename to src/decode/scanner.ts index 34b6650..831eb20 100644 --- a/src/scanner.ts +++ b/src/decode/scanner.ts @@ -1,5 +1,5 @@ -import type { Depth, ParsedLine } from './types' -import { SPACE } from './constants' +import type { Depth, ParsedLine } from '../types' +import { SPACE } from '../constants' export class LineCursor { private lines: ParsedLine[] @@ -33,6 +33,21 @@ export class LineCursor { get length(): number { return this.lines.length } + + peekAtDepth(targetDepth: Depth): ParsedLine | undefined { + const line = this.peek() + if (!line || line.depth < targetDepth) { + return undefined + } + if (line.depth === targetDepth) { + return line + } + return undefined + } + + hasMoreAtDepth(targetDepth: Depth): boolean { + return this.peekAtDepth(targetDepth) !== undefined + } } export function toParsedLines(source: string, indentSize: number): ParsedLine[] { diff --git a/src/decode/string-utils.ts b/src/decode/string-utils.ts new file mode 100644 index 0000000..7746cb5 --- /dev/null +++ b/src/decode/string-utils.ts @@ -0,0 +1,96 @@ +import { BACKSLASH, DOUBLE_QUOTE } from '../constants' + +/** + * Finds the index of the closing double quote in a string, accounting for escape sequences. + * + * @param content The string to search in + * @param start The index of the opening quote + * @returns The index of the closing quote, or -1 if not found + */ +export function findClosingQuote(content: string, start: number): number { + let i = start + 1 + while (i < content.length) { + if (content[i] === BACKSLASH && i + 1 < content.length) { + // Skip escaped character + i += 2 + continue + } + if (content[i] === DOUBLE_QUOTE) { + return i + } + i++ + } + return -1 // Not found +} + +/** + * Checks if a string contains a specific character outside of quoted sections. + * + * @param content The string to check + * @param char The character to look for + * @returns true if the character exists outside quotes, false otherwise + */ +export function hasUnquotedChar(content: string, char: string): boolean { + return findUnquotedChar(content, char) !== -1 +} + +/** + * Finds the index of a specific character outside of quoted sections. + * + * @param content The string to search in + * @param char The character to look for + * @param start Optional starting index (defaults to 0) + * @returns The index of the character, or -1 if not found outside quotes + */ +export function findUnquotedChar(content: string, char: string, start = 0): number { + let inQuotes = false + let i = start + + while (i < content.length) { + if (content[i] === BACKSLASH && i + 1 < content.length && inQuotes) { + // Skip escaped character + i += 2 + continue + } + + if (content[i] === DOUBLE_QUOTE) { + inQuotes = !inQuotes + i++ + continue + } + + if (content[i] === char && !inQuotes) { + return i + } + + i++ + } + + return -1 +} + +/** + * Checks if a string starts and ends with double quotes. + * + * @param content The string to check + * @returns true if the string is quoted, false otherwise + */ +export function isQuotedString(content: string): boolean { + const trimmed = content.trim() + return trimmed.startsWith(DOUBLE_QUOTE) && trimmed.endsWith(DOUBLE_QUOTE) && trimmed.length >= 2 +} + +/** + * Skips whitespace characters starting from a given index. + * + * @param content The string to process + * @param start The starting index + * @returns The index of the first non-whitespace character, or content.length if all whitespace + */ +export function skipWhitespace(content: string, start: number): number { + let i = start + while (i < content.length && /\s/.test(content[i]!)) { + i++ + } + return i +} diff --git a/src/decode/validation.ts b/src/decode/validation.ts new file mode 100644 index 0000000..0a35b7b --- /dev/null +++ b/src/decode/validation.ts @@ -0,0 +1,97 @@ +import type { ArrayHeaderInfo, Delimiter, Depth, ResolvedDecodeOptions } from '../types' +import type { LineCursor } from './scanner' +import { COLON, LIST_ITEM_PREFIX } from '../constants' + +/** + * Asserts that the actual count matches the expected count in strict mode. + * + * @param actual The actual count + * @param expected The expected count + * @param itemType The type of items being counted (e.g., 'list array items', 'tabular rows') + * @param options Decode options + * @throws RangeError if counts don't match in strict mode + */ +export function assertExpectedCount( + actual: number, + expected: number, + itemType: string, + options: ResolvedDecodeOptions, +): void { + if (options.strict && actual !== expected) { + throw new RangeError(`Expected ${expected} ${itemType}, but got ${actual}`) + } +} + +/** + * Validates that there are no extra list items beyond the expected count. + * + * @param cursor The line cursor + * @param itemDepth The expected depth of items + * @param expectedCount The expected number of items + * @throws RangeError if extra items are found + */ +export function validateNoExtraListItems( + cursor: LineCursor, + itemDepth: Depth, + expectedCount: number, +): void { + if (cursor.atEnd()) + return + + const nextLine = cursor.peek() + if (nextLine && nextLine.depth === itemDepth && nextLine.content.startsWith(LIST_ITEM_PREFIX)) { + throw new RangeError(`Expected ${expectedCount} list array items, but found more`) + } +} + +/** + * Checks if a line represents a data row (as opposed to a key-value pair) in a tabular array. + * + * @param content The line content + * @param delimiter The delimiter used in the table + * @returns true if the line is a data row, false if it's a key-value pair + */ +export function isDataRow(content: string, delimiter: Delimiter): boolean { + const colonPos = content.indexOf(COLON) + const delimiterPos = content.indexOf(delimiter) + + // No colon = definitely a data row + if (colonPos === -1) { + return true + } + + // Has delimiter and it comes before colon = data row + if (delimiterPos !== -1 && delimiterPos < colonPos) { + return true + } + + // Colon before delimiter or no delimiter = key-value pair + return false +} + +/** + * Validates that there are no extra tabular rows beyond the expected count. + * + * @param cursor The line cursor + * @param rowDepth The expected depth of rows + * @param header The array header info containing length and delimiter + * @throws RangeError if extra rows are found + */ +export function validateNoExtraTabularRows( + cursor: LineCursor, + rowDepth: Depth, + header: ArrayHeaderInfo, +): void { + if (cursor.atEnd()) + return + + const nextLine = cursor.peek() + if ( + nextLine + && nextLine.depth === rowDepth + && !nextLine.content.startsWith(LIST_ITEM_PREFIX) + && isDataRow(nextLine.content, header.delimiter) + ) { + throw new RangeError(`Expected ${header.length} tabular rows, but found more`) + } +} diff --git a/src/index.ts b/src/index.ts index a18cb51..f08f5ad 100644 --- a/src/index.ts +++ b/src/index.ts @@ -6,10 +6,10 @@ import type { ResolvedEncodeOptions, } from './types' import { DEFAULT_DELIMITER } from './constants' -import { decodeValueFromLines } from './decoders' +import { decodeValueFromLines } from './decode/decoders' import { encodeValue } from './encoders' import { normalizeValue } from './normalize' -import { LineCursor, toParsedLines } from './scanner' +import { LineCursor, toParsedLines } from './decode/scanner' export { DEFAULT_DELIMITER, DELIMITERS } from './constants' export type {