mirror of
https://github.com/voson-wang/toon.git
synced 2026-01-29 23:34:10 +08:00
test: add case for unquoted invalid numeric formats as strings
This commit is contained in:
@@ -1,35 +1,9 @@
|
|||||||
import type {
|
import type { ArrayHeaderInfo, Delimiter, Depth, JsonArray, JsonObject, JsonPrimitive, JsonValue, ParsedLine, ResolvedDecodeOptions } from '../types'
|
||||||
ArrayHeaderInfo,
|
|
||||||
Delimiter,
|
|
||||||
Depth,
|
|
||||||
JsonArray,
|
|
||||||
JsonObject,
|
|
||||||
JsonPrimitive,
|
|
||||||
JsonValue,
|
|
||||||
ParsedLine,
|
|
||||||
ResolvedDecodeOptions,
|
|
||||||
} from '../types'
|
|
||||||
import type { LineCursor } from './scanner'
|
import type { LineCursor } from './scanner'
|
||||||
import {
|
import { COLON, DEFAULT_DELIMITER, LIST_ITEM_PREFIX } from '../constants'
|
||||||
COLON,
|
import { findClosingQuote } from '../shared/string-utils'
|
||||||
DEFAULT_DELIMITER,
|
import { isArrayHeaderAfterHyphen, isObjectFirstFieldAfterHyphen, mapRowValuesToPrimitives, parseArrayHeaderLine, parseDelimitedValues, parseKeyToken, parsePrimitiveToken } from './parser'
|
||||||
LIST_ITEM_PREFIX,
|
import { assertExpectedCount, validateNoExtraListItems, validateNoExtraTabularRows } from './validation'
|
||||||
} from '../constants'
|
|
||||||
import {
|
|
||||||
isArrayHeaderAfterHyphen,
|
|
||||||
isObjectFirstFieldAfterHyphen,
|
|
||||||
mapRowValuesToPrimitives,
|
|
||||||
parseArrayHeaderLine,
|
|
||||||
parseDelimitedValues,
|
|
||||||
parseKeyToken,
|
|
||||||
parsePrimitiveToken,
|
|
||||||
} from './parser'
|
|
||||||
import { findClosingQuote } from './utils'
|
|
||||||
import {
|
|
||||||
assertExpectedCount,
|
|
||||||
validateNoExtraListItems,
|
|
||||||
validateNoExtraTabularRows,
|
|
||||||
} from './validation'
|
|
||||||
|
|
||||||
// #region Entry decoding
|
// #region Entry decoding
|
||||||
|
|
||||||
|
|||||||
@@ -1,27 +1,7 @@
|
|||||||
import type {
|
import type { ArrayHeaderInfo, Delimiter, JsonPrimitive } from '../types'
|
||||||
ArrayHeaderInfo,
|
import { BACKSLASH, CLOSE_BRACE, CLOSE_BRACKET, COLON, DELIMITERS, DOUBLE_QUOTE, FALSE_LITERAL, HASH, NULL_LITERAL, OPEN_BRACE, OPEN_BRACKET, PIPE, TAB, TRUE_LITERAL } from '../constants'
|
||||||
Delimiter,
|
import { isBooleanOrNullLiteral, isNumericLiteral } from '../shared/literal-utils'
|
||||||
JsonPrimitive,
|
import { findClosingQuote, findUnquotedChar, unescapeString } from '../shared/string-utils'
|
||||||
} from '../types'
|
|
||||||
import {
|
|
||||||
BACKSLASH,
|
|
||||||
CARRIAGE_RETURN,
|
|
||||||
CLOSE_BRACE,
|
|
||||||
CLOSE_BRACKET,
|
|
||||||
COLON,
|
|
||||||
DELIMITERS,
|
|
||||||
DOUBLE_QUOTE,
|
|
||||||
FALSE_LITERAL,
|
|
||||||
HASH,
|
|
||||||
NEWLINE,
|
|
||||||
NULL_LITERAL,
|
|
||||||
OPEN_BRACE,
|
|
||||||
OPEN_BRACKET,
|
|
||||||
PIPE,
|
|
||||||
TAB,
|
|
||||||
TRUE_LITERAL,
|
|
||||||
} from '../constants'
|
|
||||||
import { findClosingQuote, hasUnquotedChar } from './utils'
|
|
||||||
|
|
||||||
// #region Array header parsing
|
// #region Array header parsing
|
||||||
|
|
||||||
@@ -224,24 +204,6 @@ export function parsePrimitiveToken(token: string): JsonPrimitive {
|
|||||||
return trimmed
|
return trimmed
|
||||||
}
|
}
|
||||||
|
|
||||||
export function isBooleanOrNullLiteral(token: string): boolean {
|
|
||||||
return token === TRUE_LITERAL || token === FALSE_LITERAL || token === NULL_LITERAL
|
|
||||||
}
|
|
||||||
|
|
||||||
export function isNumericLiteral(token: string): boolean {
|
|
||||||
if (!token)
|
|
||||||
return false
|
|
||||||
|
|
||||||
// Must not have leading zeros (except for "0" itself or decimals like "0.5")
|
|
||||||
if (token.length > 1 && token[0] === '0' && token[1] !== '.') {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if it's a valid number
|
|
||||||
const num = Number(token)
|
|
||||||
return !Number.isNaN(num) && Number.isFinite(num)
|
|
||||||
}
|
|
||||||
|
|
||||||
export function parseStringLiteral(token: string): string {
|
export function parseStringLiteral(token: string): string {
|
||||||
const trimmed = token.trim()
|
const trimmed = token.trim()
|
||||||
|
|
||||||
@@ -265,53 +227,6 @@ export function parseStringLiteral(token: string): string {
|
|||||||
return trimmed
|
return trimmed
|
||||||
}
|
}
|
||||||
|
|
||||||
export function unescapeString(value: string): string {
|
|
||||||
let result = ''
|
|
||||||
let i = 0
|
|
||||||
|
|
||||||
while (i < value.length) {
|
|
||||||
if (value[i] === BACKSLASH) {
|
|
||||||
if (i + 1 >= value.length) {
|
|
||||||
throw new SyntaxError('Invalid escape sequence: backslash at end of string')
|
|
||||||
}
|
|
||||||
|
|
||||||
const next = value[i + 1]
|
|
||||||
if (next === 'n') {
|
|
||||||
result += NEWLINE
|
|
||||||
i += 2
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if (next === 't') {
|
|
||||||
result += TAB
|
|
||||||
i += 2
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if (next === 'r') {
|
|
||||||
result += CARRIAGE_RETURN
|
|
||||||
i += 2
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if (next === BACKSLASH) {
|
|
||||||
result += BACKSLASH
|
|
||||||
i += 2
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if (next === DOUBLE_QUOTE) {
|
|
||||||
result += DOUBLE_QUOTE
|
|
||||||
i += 2
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
throw new SyntaxError(`Invalid escape sequence: \\${next}`)
|
|
||||||
}
|
|
||||||
|
|
||||||
result += value[i]
|
|
||||||
i++
|
|
||||||
}
|
|
||||||
|
|
||||||
return result
|
|
||||||
}
|
|
||||||
|
|
||||||
export function parseUnquotedKey(content: string, start: number): { key: string, end: number } {
|
export function parseUnquotedKey(content: string, start: number): { key: string, end: number } {
|
||||||
let end = start
|
let end = start
|
||||||
while (end < content.length && content[end] !== COLON) {
|
while (end < content.length && content[end] !== COLON) {
|
||||||
@@ -367,11 +282,11 @@ export function parseKeyToken(content: string, start: number): { key: string, en
|
|||||||
// #region Array content detection helpers
|
// #region Array content detection helpers
|
||||||
|
|
||||||
export function isArrayHeaderAfterHyphen(content: string): boolean {
|
export function isArrayHeaderAfterHyphen(content: string): boolean {
|
||||||
return content.trim().startsWith(OPEN_BRACKET) && hasUnquotedChar(content, COLON)
|
return content.trim().startsWith(OPEN_BRACKET) && findUnquotedChar(content, COLON) !== -1
|
||||||
}
|
}
|
||||||
|
|
||||||
export function isObjectFirstFieldAfterHyphen(content: string): boolean {
|
export function isObjectFirstFieldAfterHyphen(content: string): boolean {
|
||||||
return hasUnquotedChar(content, COLON)
|
return findUnquotedChar(content, COLON) !== -1
|
||||||
}
|
}
|
||||||
|
|
||||||
// #endregion
|
// #endregion
|
||||||
|
|||||||
@@ -1,96 +0,0 @@
|
|||||||
import { BACKSLASH, DOUBLE_QUOTE } from '../constants'
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Finds the index of the closing double quote in a string, accounting for escape sequences.
|
|
||||||
*
|
|
||||||
* @param content The string to search in
|
|
||||||
* @param start The index of the opening quote
|
|
||||||
* @returns The index of the closing quote, or -1 if not found
|
|
||||||
*/
|
|
||||||
export function findClosingQuote(content: string, start: number): number {
|
|
||||||
let i = start + 1
|
|
||||||
while (i < content.length) {
|
|
||||||
if (content[i] === BACKSLASH && i + 1 < content.length) {
|
|
||||||
// Skip escaped character
|
|
||||||
i += 2
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if (content[i] === DOUBLE_QUOTE) {
|
|
||||||
return i
|
|
||||||
}
|
|
||||||
i++
|
|
||||||
}
|
|
||||||
return -1 // Not found
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Checks if a string contains a specific character outside of quoted sections.
|
|
||||||
*
|
|
||||||
* @param content The string to check
|
|
||||||
* @param char The character to look for
|
|
||||||
* @returns true if the character exists outside quotes, false otherwise
|
|
||||||
*/
|
|
||||||
export function hasUnquotedChar(content: string, char: string): boolean {
|
|
||||||
return findUnquotedChar(content, char) !== -1
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Finds the index of a specific character outside of quoted sections.
|
|
||||||
*
|
|
||||||
* @param content The string to search in
|
|
||||||
* @param char The character to look for
|
|
||||||
* @param start Optional starting index (defaults to 0)
|
|
||||||
* @returns The index of the character, or -1 if not found outside quotes
|
|
||||||
*/
|
|
||||||
export function findUnquotedChar(content: string, char: string, start = 0): number {
|
|
||||||
let inQuotes = false
|
|
||||||
let i = start
|
|
||||||
|
|
||||||
while (i < content.length) {
|
|
||||||
if (content[i] === BACKSLASH && i + 1 < content.length && inQuotes) {
|
|
||||||
// Skip escaped character
|
|
||||||
i += 2
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
if (content[i] === DOUBLE_QUOTE) {
|
|
||||||
inQuotes = !inQuotes
|
|
||||||
i++
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
if (content[i] === char && !inQuotes) {
|
|
||||||
return i
|
|
||||||
}
|
|
||||||
|
|
||||||
i++
|
|
||||||
}
|
|
||||||
|
|
||||||
return -1
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Checks if a string starts and ends with double quotes.
|
|
||||||
*
|
|
||||||
* @param content The string to check
|
|
||||||
* @returns true if the string is quoted, false otherwise
|
|
||||||
*/
|
|
||||||
export function isQuotedString(content: string): boolean {
|
|
||||||
const trimmed = content.trim()
|
|
||||||
return trimmed.startsWith(DOUBLE_QUOTE) && trimmed.endsWith(DOUBLE_QUOTE) && trimmed.length >= 2
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Skips whitespace characters starting from a given index.
|
|
||||||
*
|
|
||||||
* @param content The string to process
|
|
||||||
* @param start The starting index
|
|
||||||
* @returns The index of the first non-whitespace character, or content.length if all whitespace
|
|
||||||
*/
|
|
||||||
export function skipWhitespace(content: string, start: number): number {
|
|
||||||
let i = start
|
|
||||||
while (i < content.length && /\s/.test(content[i]!)) {
|
|
||||||
i++
|
|
||||||
}
|
|
||||||
return i
|
|
||||||
}
|
|
||||||
@@ -7,7 +7,7 @@ import { COLON, LIST_ITEM_PREFIX } from '../constants'
|
|||||||
*
|
*
|
||||||
* @param actual The actual count
|
* @param actual The actual count
|
||||||
* @param expected The expected count
|
* @param expected The expected count
|
||||||
* @param itemType The type of items being counted (e.g., 'list array items', 'tabular rows')
|
* @param itemType The type of items being counted (e.g., `list array items`, `tabular rows`)
|
||||||
* @param options Decode options
|
* @param options Decode options
|
||||||
* @throws RangeError if counts don't match in strict mode
|
* @throws RangeError if counts don't match in strict mode
|
||||||
*/
|
*/
|
||||||
@@ -44,31 +44,6 @@ export function validateNoExtraListItems(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Checks if a line represents a data row (as opposed to a key-value pair) in a tabular array.
|
|
||||||
*
|
|
||||||
* @param content The line content
|
|
||||||
* @param delimiter The delimiter used in the table
|
|
||||||
* @returns true if the line is a data row, false if it's a key-value pair
|
|
||||||
*/
|
|
||||||
export function isDataRow(content: string, delimiter: Delimiter): boolean {
|
|
||||||
const colonPos = content.indexOf(COLON)
|
|
||||||
const delimiterPos = content.indexOf(delimiter)
|
|
||||||
|
|
||||||
// No colon = definitely a data row
|
|
||||||
if (colonPos === -1) {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
// Has delimiter and it comes before colon = data row
|
|
||||||
if (delimiterPos !== -1 && delimiterPos < colonPos) {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
// Colon before delimiter or no delimiter = key-value pair
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Validates that there are no extra tabular rows beyond the expected count.
|
* Validates that there are no extra tabular rows beyond the expected count.
|
||||||
*
|
*
|
||||||
@@ -95,3 +70,28 @@ export function validateNoExtraTabularRows(
|
|||||||
throw new RangeError(`Expected ${header.length} tabular rows, but found more`)
|
throw new RangeError(`Expected ${header.length} tabular rows, but found more`)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks if a line represents a data row (as opposed to a key-value pair) in a tabular array.
|
||||||
|
*
|
||||||
|
* @param content The line content
|
||||||
|
* @param delimiter The delimiter used in the table
|
||||||
|
* @returns true if the line is a data row, false if it's a key-value pair
|
||||||
|
*/
|
||||||
|
function isDataRow(content: string, delimiter: Delimiter): boolean {
|
||||||
|
const colonPos = content.indexOf(COLON)
|
||||||
|
const delimiterPos = content.indexOf(delimiter)
|
||||||
|
|
||||||
|
// No colon = definitely a data row
|
||||||
|
if (colonPos === -1) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// Has delimiter and it comes before colon = data row
|
||||||
|
if (delimiterPos !== -1 && delimiterPos < colonPos) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// Colon before delimiter or no delimiter = key-value pair
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,26 +1,7 @@
|
|||||||
import type {
|
import type { Depth, JsonArray, JsonObject, JsonPrimitive, JsonValue, ResolvedEncodeOptions } from '../types'
|
||||||
Depth,
|
|
||||||
JsonArray,
|
|
||||||
JsonObject,
|
|
||||||
JsonPrimitive,
|
|
||||||
JsonValue,
|
|
||||||
ResolvedEncodeOptions,
|
|
||||||
} from '../types'
|
|
||||||
import { LIST_ITEM_MARKER } from '../constants'
|
import { LIST_ITEM_MARKER } from '../constants'
|
||||||
import {
|
import { isArrayOfArrays, isArrayOfObjects, isArrayOfPrimitives, isJsonArray, isJsonObject, isJsonPrimitive } from './normalize'
|
||||||
isArrayOfArrays,
|
import { encodeAndJoinPrimitives, encodeKey, encodePrimitive, formatHeader } from './primitives'
|
||||||
isArrayOfObjects,
|
|
||||||
isArrayOfPrimitives,
|
|
||||||
isJsonArray,
|
|
||||||
isJsonObject,
|
|
||||||
isJsonPrimitive,
|
|
||||||
} from './normalize'
|
|
||||||
import {
|
|
||||||
encodeAndJoinPrimitives,
|
|
||||||
encodeKey,
|
|
||||||
encodePrimitive,
|
|
||||||
formatHeader,
|
|
||||||
} from './primitives'
|
|
||||||
import { LineWriter } from './writer'
|
import { LineWriter } from './writer'
|
||||||
|
|
||||||
// #region Encode normalized JsonValue
|
// #region Encode normalized JsonValue
|
||||||
|
|||||||
@@ -1,9 +1,4 @@
|
|||||||
import type {
|
import type { JsonArray, JsonObject, JsonPrimitive, JsonValue } from '../types'
|
||||||
JsonArray,
|
|
||||||
JsonObject,
|
|
||||||
JsonPrimitive,
|
|
||||||
JsonValue,
|
|
||||||
} from '../types'
|
|
||||||
|
|
||||||
// #region Normalization (unknown → JsonValue)
|
// #region Normalization (unknown → JsonValue)
|
||||||
|
|
||||||
|
|||||||
@@ -1,14 +1,7 @@
|
|||||||
import type { JsonPrimitive } from '../types'
|
import type { JsonPrimitive } from '../types'
|
||||||
import {
|
import { COMMA, DEFAULT_DELIMITER, DOUBLE_QUOTE, NULL_LITERAL } from '../constants'
|
||||||
BACKSLASH,
|
import { escapeString } from '../shared/string-utils'
|
||||||
COMMA,
|
import { isSafeUnquoted, isValidUnquotedKey } from '../shared/validation'
|
||||||
DEFAULT_DELIMITER,
|
|
||||||
DOUBLE_QUOTE,
|
|
||||||
FALSE_LITERAL,
|
|
||||||
LIST_ITEM_MARKER,
|
|
||||||
NULL_LITERAL,
|
|
||||||
TRUE_LITERAL,
|
|
||||||
} from '../constants'
|
|
||||||
|
|
||||||
// #region Primitive encoding
|
// #region Primitive encoding
|
||||||
|
|
||||||
@@ -36,74 +29,6 @@ export function encodeStringLiteral(value: string, delimiter: string = COMMA): s
|
|||||||
return `${DOUBLE_QUOTE}${escapeString(value)}${DOUBLE_QUOTE}`
|
return `${DOUBLE_QUOTE}${escapeString(value)}${DOUBLE_QUOTE}`
|
||||||
}
|
}
|
||||||
|
|
||||||
export function escapeString(value: string): string {
|
|
||||||
return value
|
|
||||||
.replace(/\\/g, `${BACKSLASH}${BACKSLASH}`)
|
|
||||||
.replace(/"/g, `${BACKSLASH}${DOUBLE_QUOTE}`)
|
|
||||||
.replace(/\n/g, `${BACKSLASH}n`)
|
|
||||||
.replace(/\r/g, `${BACKSLASH}r`)
|
|
||||||
.replace(/\t/g, `${BACKSLASH}t`)
|
|
||||||
}
|
|
||||||
|
|
||||||
export function isSafeUnquoted(value: string, delimiter: string = COMMA): boolean {
|
|
||||||
if (!value) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
if (isPaddedWithWhitespace(value)) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
if (value === TRUE_LITERAL || value === FALSE_LITERAL || value === NULL_LITERAL) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
if (isNumericLike(value)) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check for colon (always structural)
|
|
||||||
if (value.includes(':')) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check for quotes and backslash (always need escaping)
|
|
||||||
if (value.includes('"') || value.includes('\\')) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check for brackets and braces (always structural)
|
|
||||||
if (/[[\]{}]/.test(value)) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check for control characters (newline, carriage return, tab - always need quoting/escaping)
|
|
||||||
if (/[\n\r\t]/.test(value)) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check for the active delimiter
|
|
||||||
if (value.includes(delimiter)) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check for hyphen at start (list marker)
|
|
||||||
if (value.startsWith(LIST_ITEM_MARKER)) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
function isNumericLike(value: string): boolean {
|
|
||||||
// Match numbers like: 42, -3.14, 1e-6, 05, etc.
|
|
||||||
return /^-?\d+(?:\.\d+)?(?:e[+-]?\d+)?$/i.test(value) || /^0\d+$/.test(value)
|
|
||||||
}
|
|
||||||
|
|
||||||
function isPaddedWithWhitespace(value: string): boolean {
|
|
||||||
return value !== value.trim()
|
|
||||||
}
|
|
||||||
|
|
||||||
// #endregion
|
// #endregion
|
||||||
|
|
||||||
// #region Key encoding
|
// #region Key encoding
|
||||||
@@ -116,10 +41,6 @@ export function encodeKey(key: string): string {
|
|||||||
return `${DOUBLE_QUOTE}${escapeString(key)}${DOUBLE_QUOTE}`
|
return `${DOUBLE_QUOTE}${escapeString(key)}${DOUBLE_QUOTE}`
|
||||||
}
|
}
|
||||||
|
|
||||||
function isValidUnquotedKey(key: string): boolean {
|
|
||||||
return /^[A-Z_][\w.]*$/i.test(key)
|
|
||||||
}
|
|
||||||
|
|
||||||
// #endregion
|
// #endregion
|
||||||
|
|
||||||
// #region Value joining
|
// #region Value joining
|
||||||
@@ -132,9 +53,6 @@ export function encodeAndJoinPrimitives(values: readonly JsonPrimitive[], delimi
|
|||||||
|
|
||||||
// #region Header formatters
|
// #region Header formatters
|
||||||
|
|
||||||
/**
|
|
||||||
* Header formatter for arrays and tables with optional key prefix and field names
|
|
||||||
*/
|
|
||||||
export function formatHeader(
|
export function formatHeader(
|
||||||
length: number,
|
length: number,
|
||||||
options?: {
|
options?: {
|
||||||
|
|||||||
@@ -1,10 +1,4 @@
|
|||||||
import type {
|
import type { DecodeOptions, EncodeOptions, JsonValue, ResolvedDecodeOptions, ResolvedEncodeOptions } from './types'
|
||||||
DecodeOptions,
|
|
||||||
EncodeOptions,
|
|
||||||
JsonValue,
|
|
||||||
ResolvedDecodeOptions,
|
|
||||||
ResolvedEncodeOptions,
|
|
||||||
} from './types'
|
|
||||||
import { DEFAULT_DELIMITER } from './constants'
|
import { DEFAULT_DELIMITER } from './constants'
|
||||||
import { decodeValueFromLines } from './decode/decoders'
|
import { decodeValueFromLines } from './decode/decoders'
|
||||||
import { LineCursor, toParsedLines } from './decode/scanner'
|
import { LineCursor, toParsedLines } from './decode/scanner'
|
||||||
|
|||||||
28
src/shared/literal-utils.ts
Normal file
28
src/shared/literal-utils.ts
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
import { FALSE_LITERAL, NULL_LITERAL, TRUE_LITERAL } from '../constants'
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks if a token is a boolean or null literal (`true`, `false`, `null`).
|
||||||
|
*/
|
||||||
|
export function isBooleanOrNullLiteral(token: string): boolean {
|
||||||
|
return token === TRUE_LITERAL || token === FALSE_LITERAL || token === NULL_LITERAL
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks if a token represents a valid numeric literal.
|
||||||
|
*
|
||||||
|
* @remarks
|
||||||
|
* Rejects numbers with leading zeros (except `"0"` itself or decimals like `"0.5"`).
|
||||||
|
*/
|
||||||
|
export function isNumericLiteral(token: string): boolean {
|
||||||
|
if (!token)
|
||||||
|
return false
|
||||||
|
|
||||||
|
// Must not have leading zeros (except for `"0"` itself or decimals like `"0.5"`)
|
||||||
|
if (token.length > 1 && token[0] === '0' && token[1] !== '.') {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if it's a valid number
|
||||||
|
const num = Number(token)
|
||||||
|
return !Number.isNaN(num) && Number.isFinite(num)
|
||||||
|
}
|
||||||
127
src/shared/string-utils.ts
Normal file
127
src/shared/string-utils.ts
Normal file
@@ -0,0 +1,127 @@
|
|||||||
|
import { BACKSLASH, CARRIAGE_RETURN, DOUBLE_QUOTE, NEWLINE, TAB } from '../constants'
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Escapes special characters in a string for encoding.
|
||||||
|
*
|
||||||
|
* @remarks
|
||||||
|
* Handles backslashes, quotes, newlines, carriage returns, and tabs.
|
||||||
|
*/
|
||||||
|
export function escapeString(value: string): string {
|
||||||
|
return value
|
||||||
|
.replace(/\\/g, `${BACKSLASH}${BACKSLASH}`)
|
||||||
|
.replace(/"/g, `${BACKSLASH}${DOUBLE_QUOTE}`)
|
||||||
|
.replace(/\n/g, `${BACKSLASH}n`)
|
||||||
|
.replace(/\r/g, `${BACKSLASH}r`)
|
||||||
|
.replace(/\t/g, `${BACKSLASH}t`)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Unescapes a string by processing escape sequences.
|
||||||
|
*
|
||||||
|
* @remarks
|
||||||
|
* Handles `\n`, `\t`, `\r`, `\\`, and `\"` escape sequences.
|
||||||
|
*/
|
||||||
|
export function unescapeString(value: string): string {
|
||||||
|
let result = ''
|
||||||
|
let i = 0
|
||||||
|
|
||||||
|
while (i < value.length) {
|
||||||
|
if (value[i] === BACKSLASH) {
|
||||||
|
if (i + 1 >= value.length) {
|
||||||
|
throw new SyntaxError('Invalid escape sequence: backslash at end of string')
|
||||||
|
}
|
||||||
|
|
||||||
|
const next = value[i + 1]
|
||||||
|
if (next === 'n') {
|
||||||
|
result += NEWLINE
|
||||||
|
i += 2
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if (next === 't') {
|
||||||
|
result += TAB
|
||||||
|
i += 2
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if (next === 'r') {
|
||||||
|
result += CARRIAGE_RETURN
|
||||||
|
i += 2
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if (next === BACKSLASH) {
|
||||||
|
result += BACKSLASH
|
||||||
|
i += 2
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if (next === DOUBLE_QUOTE) {
|
||||||
|
result += DOUBLE_QUOTE
|
||||||
|
i += 2
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new SyntaxError(`Invalid escape sequence: \\${next}`)
|
||||||
|
}
|
||||||
|
|
||||||
|
result += value[i]
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Finds the index of the closing double quote in a string, accounting for escape sequences.
|
||||||
|
*
|
||||||
|
* @param content The string to search in
|
||||||
|
* @param start The index of the opening quote
|
||||||
|
* @returns The index of the closing quote, or -1 if not found
|
||||||
|
*/
|
||||||
|
export function findClosingQuote(content: string, start: number): number {
|
||||||
|
let i = start + 1
|
||||||
|
while (i < content.length) {
|
||||||
|
if (content[i] === BACKSLASH && i + 1 < content.length) {
|
||||||
|
// Skip escaped character
|
||||||
|
i += 2
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if (content[i] === DOUBLE_QUOTE) {
|
||||||
|
return i
|
||||||
|
}
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
return -1 // Not found
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Finds the index of a specific character outside of quoted sections.
|
||||||
|
*
|
||||||
|
* @param content The string to search in
|
||||||
|
* @param char The character to look for
|
||||||
|
* @param start Optional starting index (defaults to 0)
|
||||||
|
* @returns The index of the character, or -1 if not found outside quotes
|
||||||
|
*/
|
||||||
|
export function findUnquotedChar(content: string, char: string, start = 0): number {
|
||||||
|
let inQuotes = false
|
||||||
|
let i = start
|
||||||
|
|
||||||
|
while (i < content.length) {
|
||||||
|
if (content[i] === BACKSLASH && i + 1 < content.length && inQuotes) {
|
||||||
|
// Skip escaped character
|
||||||
|
i += 2
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if (content[i] === DOUBLE_QUOTE) {
|
||||||
|
inQuotes = !inQuotes
|
||||||
|
i++
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if (content[i] === char && !inQuotes) {
|
||||||
|
return i
|
||||||
|
}
|
||||||
|
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
|
||||||
|
return -1
|
||||||
|
}
|
||||||
84
src/shared/validation.ts
Normal file
84
src/shared/validation.ts
Normal file
@@ -0,0 +1,84 @@
|
|||||||
|
import { COMMA, LIST_ITEM_MARKER } from '../constants'
|
||||||
|
import { isBooleanOrNullLiteral } from './literal-utils'
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks if a key can be used without quotes.
|
||||||
|
*
|
||||||
|
* @remarks
|
||||||
|
* Valid unquoted keys must start with a letter or underscore,
|
||||||
|
* followed by letters, digits, underscores, or dots.
|
||||||
|
*/
|
||||||
|
export function isValidUnquotedKey(key: string): boolean {
|
||||||
|
return /^[A-Z_][\w.]*$/i.test(key)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Determines if a string value can be safely encoded without quotes.
|
||||||
|
*
|
||||||
|
* @remarks
|
||||||
|
* A string needs quoting if it:
|
||||||
|
* - Is empty
|
||||||
|
* - Has leading or trailing whitespace
|
||||||
|
* - Could be confused with a literal (boolean, null, number)
|
||||||
|
* - Contains structural characters (colons, brackets, braces)
|
||||||
|
* - Contains quotes or backslashes (need escaping)
|
||||||
|
* - Contains control characters (newlines, tabs, etc.)
|
||||||
|
* - Contains the active delimiter
|
||||||
|
* - Starts with a list marker (hyphen)
|
||||||
|
*/
|
||||||
|
export function isSafeUnquoted(value: string, delimiter: string = COMMA): boolean {
|
||||||
|
if (!value) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
if (value !== value.trim()) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if it looks like any literal value (boolean, null, or numeric)
|
||||||
|
if (isBooleanOrNullLiteral(value) || isNumericLike(value)) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for colon (always structural)
|
||||||
|
if (value.includes(':')) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for quotes and backslash (always need escaping)
|
||||||
|
if (value.includes('"') || value.includes('\\')) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for brackets and braces (always structural)
|
||||||
|
if (/[[\]{}]/.test(value)) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for control characters (newline, carriage return, tab - always need quoting/escaping)
|
||||||
|
if (/[\n\r\t]/.test(value)) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for the active delimiter
|
||||||
|
if (value.includes(delimiter)) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for hyphen at start (list marker)
|
||||||
|
if (value.startsWith(LIST_ITEM_MARKER)) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks if a string looks like a number.
|
||||||
|
*
|
||||||
|
* @remarks
|
||||||
|
* Match numbers like `42`, `-3.14`, `1e-6`, `05`, etc.
|
||||||
|
*/
|
||||||
|
function isNumericLike(value: string): boolean {
|
||||||
|
return /^-?\d+(?:\.\d+)?(?:e[+-]?\d+)?$/i.test(value) || /^0\d+$/.test(value)
|
||||||
|
}
|
||||||
@@ -32,6 +32,14 @@ describe('primitives', () => {
|
|||||||
expect(decode('null')).toBe(null)
|
expect(decode('null')).toBe(null)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
it('treats unquoted invalid numeric formats as strings', () => {
|
||||||
|
expect(decode('05')).toBe('05')
|
||||||
|
expect(decode('007')).toBe('007')
|
||||||
|
expect(decode('0123')).toBe('0123')
|
||||||
|
expect(decode('a: 05')).toEqual({ a: '05' })
|
||||||
|
expect(decode('nums[3]: 05,007,0123')).toEqual({ nums: ['05', '007', '0123'] })
|
||||||
|
})
|
||||||
|
|
||||||
it('respects ambiguity quoting (quoted primitives remain strings)', () => {
|
it('respects ambiguity quoting (quoted primitives remain strings)', () => {
|
||||||
expect(decode('"true"')).toBe('true')
|
expect(decode('"true"')).toBe('true')
|
||||||
expect(decode('"false"')).toBe('false')
|
expect(decode('"false"')).toBe('false')
|
||||||
|
|||||||
Reference in New Issue
Block a user