test: add case for unquoted invalid numeric formats as strings

This commit is contained in:
Johann Schopplich
2025-10-29 13:05:42 +01:00
parent b034c4455e
commit ee31be3bdc
12 changed files with 292 additions and 364 deletions

View File

@@ -1,35 +1,9 @@
import type {
ArrayHeaderInfo,
Delimiter,
Depth,
JsonArray,
JsonObject,
JsonPrimitive,
JsonValue,
ParsedLine,
ResolvedDecodeOptions,
} from '../types'
import type { ArrayHeaderInfo, Delimiter, Depth, JsonArray, JsonObject, JsonPrimitive, JsonValue, ParsedLine, ResolvedDecodeOptions } from '../types'
import type { LineCursor } from './scanner'
import {
COLON,
DEFAULT_DELIMITER,
LIST_ITEM_PREFIX,
} from '../constants'
import {
isArrayHeaderAfterHyphen,
isObjectFirstFieldAfterHyphen,
mapRowValuesToPrimitives,
parseArrayHeaderLine,
parseDelimitedValues,
parseKeyToken,
parsePrimitiveToken,
} from './parser'
import { findClosingQuote } from './utils'
import {
assertExpectedCount,
validateNoExtraListItems,
validateNoExtraTabularRows,
} from './validation'
import { COLON, DEFAULT_DELIMITER, LIST_ITEM_PREFIX } from '../constants'
import { findClosingQuote } from '../shared/string-utils'
import { isArrayHeaderAfterHyphen, isObjectFirstFieldAfterHyphen, mapRowValuesToPrimitives, parseArrayHeaderLine, parseDelimitedValues, parseKeyToken, parsePrimitiveToken } from './parser'
import { assertExpectedCount, validateNoExtraListItems, validateNoExtraTabularRows } from './validation'
// #region Entry decoding

View File

@@ -1,27 +1,7 @@
import type {
ArrayHeaderInfo,
Delimiter,
JsonPrimitive,
} from '../types'
import {
BACKSLASH,
CARRIAGE_RETURN,
CLOSE_BRACE,
CLOSE_BRACKET,
COLON,
DELIMITERS,
DOUBLE_QUOTE,
FALSE_LITERAL,
HASH,
NEWLINE,
NULL_LITERAL,
OPEN_BRACE,
OPEN_BRACKET,
PIPE,
TAB,
TRUE_LITERAL,
} from '../constants'
import { findClosingQuote, hasUnquotedChar } from './utils'
import type { ArrayHeaderInfo, Delimiter, JsonPrimitive } from '../types'
import { BACKSLASH, CLOSE_BRACE, CLOSE_BRACKET, COLON, DELIMITERS, DOUBLE_QUOTE, FALSE_LITERAL, HASH, NULL_LITERAL, OPEN_BRACE, OPEN_BRACKET, PIPE, TAB, TRUE_LITERAL } from '../constants'
import { isBooleanOrNullLiteral, isNumericLiteral } from '../shared/literal-utils'
import { findClosingQuote, findUnquotedChar, unescapeString } from '../shared/string-utils'
// #region Array header parsing
@@ -224,24 +204,6 @@ export function parsePrimitiveToken(token: string): JsonPrimitive {
return trimmed
}
export function isBooleanOrNullLiteral(token: string): boolean {
return token === TRUE_LITERAL || token === FALSE_LITERAL || token === NULL_LITERAL
}
export function isNumericLiteral(token: string): boolean {
if (!token)
return false
// Must not have leading zeros (except for "0" itself or decimals like "0.5")
if (token.length > 1 && token[0] === '0' && token[1] !== '.') {
return false
}
// Check if it's a valid number
const num = Number(token)
return !Number.isNaN(num) && Number.isFinite(num)
}
export function parseStringLiteral(token: string): string {
const trimmed = token.trim()
@@ -265,53 +227,6 @@ export function parseStringLiteral(token: string): string {
return trimmed
}
export function unescapeString(value: string): string {
let result = ''
let i = 0
while (i < value.length) {
if (value[i] === BACKSLASH) {
if (i + 1 >= value.length) {
throw new SyntaxError('Invalid escape sequence: backslash at end of string')
}
const next = value[i + 1]
if (next === 'n') {
result += NEWLINE
i += 2
continue
}
if (next === 't') {
result += TAB
i += 2
continue
}
if (next === 'r') {
result += CARRIAGE_RETURN
i += 2
continue
}
if (next === BACKSLASH) {
result += BACKSLASH
i += 2
continue
}
if (next === DOUBLE_QUOTE) {
result += DOUBLE_QUOTE
i += 2
continue
}
throw new SyntaxError(`Invalid escape sequence: \\${next}`)
}
result += value[i]
i++
}
return result
}
export function parseUnquotedKey(content: string, start: number): { key: string, end: number } {
let end = start
while (end < content.length && content[end] !== COLON) {
@@ -367,11 +282,11 @@ export function parseKeyToken(content: string, start: number): { key: string, en
// #region Array content detection helpers
export function isArrayHeaderAfterHyphen(content: string): boolean {
return content.trim().startsWith(OPEN_BRACKET) && hasUnquotedChar(content, COLON)
return content.trim().startsWith(OPEN_BRACKET) && findUnquotedChar(content, COLON) !== -1
}
export function isObjectFirstFieldAfterHyphen(content: string): boolean {
return hasUnquotedChar(content, COLON)
return findUnquotedChar(content, COLON) !== -1
}
// #endregion

View File

@@ -1,96 +0,0 @@
import { BACKSLASH, DOUBLE_QUOTE } from '../constants'
/**
* Finds the index of the closing double quote in a string, accounting for escape sequences.
*
* @param content The string to search in
* @param start The index of the opening quote
* @returns The index of the closing quote, or -1 if not found
*/
export function findClosingQuote(content: string, start: number): number {
let i = start + 1
while (i < content.length) {
if (content[i] === BACKSLASH && i + 1 < content.length) {
// Skip escaped character
i += 2
continue
}
if (content[i] === DOUBLE_QUOTE) {
return i
}
i++
}
return -1 // Not found
}
/**
* Checks if a string contains a specific character outside of quoted sections.
*
* @param content The string to check
* @param char The character to look for
* @returns true if the character exists outside quotes, false otherwise
*/
export function hasUnquotedChar(content: string, char: string): boolean {
return findUnquotedChar(content, char) !== -1
}
/**
* Finds the index of a specific character outside of quoted sections.
*
* @param content The string to search in
* @param char The character to look for
* @param start Optional starting index (defaults to 0)
* @returns The index of the character, or -1 if not found outside quotes
*/
export function findUnquotedChar(content: string, char: string, start = 0): number {
let inQuotes = false
let i = start
while (i < content.length) {
if (content[i] === BACKSLASH && i + 1 < content.length && inQuotes) {
// Skip escaped character
i += 2
continue
}
if (content[i] === DOUBLE_QUOTE) {
inQuotes = !inQuotes
i++
continue
}
if (content[i] === char && !inQuotes) {
return i
}
i++
}
return -1
}
/**
* Checks if a string starts and ends with double quotes.
*
* @param content The string to check
* @returns true if the string is quoted, false otherwise
*/
export function isQuotedString(content: string): boolean {
const trimmed = content.trim()
return trimmed.startsWith(DOUBLE_QUOTE) && trimmed.endsWith(DOUBLE_QUOTE) && trimmed.length >= 2
}
/**
* Skips whitespace characters starting from a given index.
*
* @param content The string to process
* @param start The starting index
* @returns The index of the first non-whitespace character, or content.length if all whitespace
*/
export function skipWhitespace(content: string, start: number): number {
let i = start
while (i < content.length && /\s/.test(content[i]!)) {
i++
}
return i
}

View File

@@ -7,7 +7,7 @@ import { COLON, LIST_ITEM_PREFIX } from '../constants'
*
* @param actual The actual count
* @param expected The expected count
* @param itemType The type of items being counted (e.g., 'list array items', 'tabular rows')
* @param itemType The type of items being counted (e.g., `list array items`, `tabular rows`)
* @param options Decode options
* @throws RangeError if counts don't match in strict mode
*/
@@ -44,31 +44,6 @@ export function validateNoExtraListItems(
}
}
/**
* Checks if a line represents a data row (as opposed to a key-value pair) in a tabular array.
*
* @param content The line content
* @param delimiter The delimiter used in the table
* @returns true if the line is a data row, false if it's a key-value pair
*/
export function isDataRow(content: string, delimiter: Delimiter): boolean {
const colonPos = content.indexOf(COLON)
const delimiterPos = content.indexOf(delimiter)
// No colon = definitely a data row
if (colonPos === -1) {
return true
}
// Has delimiter and it comes before colon = data row
if (delimiterPos !== -1 && delimiterPos < colonPos) {
return true
}
// Colon before delimiter or no delimiter = key-value pair
return false
}
/**
* Validates that there are no extra tabular rows beyond the expected count.
*
@@ -95,3 +70,28 @@ export function validateNoExtraTabularRows(
throw new RangeError(`Expected ${header.length} tabular rows, but found more`)
}
}
/**
* Checks if a line represents a data row (as opposed to a key-value pair) in a tabular array.
*
* @param content The line content
* @param delimiter The delimiter used in the table
* @returns true if the line is a data row, false if it's a key-value pair
*/
function isDataRow(content: string, delimiter: Delimiter): boolean {
const colonPos = content.indexOf(COLON)
const delimiterPos = content.indexOf(delimiter)
// No colon = definitely a data row
if (colonPos === -1) {
return true
}
// Has delimiter and it comes before colon = data row
if (delimiterPos !== -1 && delimiterPos < colonPos) {
return true
}
// Colon before delimiter or no delimiter = key-value pair
return false
}

View File

@@ -1,26 +1,7 @@
import type {
Depth,
JsonArray,
JsonObject,
JsonPrimitive,
JsonValue,
ResolvedEncodeOptions,
} from '../types'
import type { Depth, JsonArray, JsonObject, JsonPrimitive, JsonValue, ResolvedEncodeOptions } from '../types'
import { LIST_ITEM_MARKER } from '../constants'
import {
isArrayOfArrays,
isArrayOfObjects,
isArrayOfPrimitives,
isJsonArray,
isJsonObject,
isJsonPrimitive,
} from './normalize'
import {
encodeAndJoinPrimitives,
encodeKey,
encodePrimitive,
formatHeader,
} from './primitives'
import { isArrayOfArrays, isArrayOfObjects, isArrayOfPrimitives, isJsonArray, isJsonObject, isJsonPrimitive } from './normalize'
import { encodeAndJoinPrimitives, encodeKey, encodePrimitive, formatHeader } from './primitives'
import { LineWriter } from './writer'
// #region Encode normalized JsonValue

View File

@@ -1,9 +1,4 @@
import type {
JsonArray,
JsonObject,
JsonPrimitive,
JsonValue,
} from '../types'
import type { JsonArray, JsonObject, JsonPrimitive, JsonValue } from '../types'
// #region Normalization (unknown → JsonValue)

View File

@@ -1,14 +1,7 @@
import type { JsonPrimitive } from '../types'
import {
BACKSLASH,
COMMA,
DEFAULT_DELIMITER,
DOUBLE_QUOTE,
FALSE_LITERAL,
LIST_ITEM_MARKER,
NULL_LITERAL,
TRUE_LITERAL,
} from '../constants'
import { COMMA, DEFAULT_DELIMITER, DOUBLE_QUOTE, NULL_LITERAL } from '../constants'
import { escapeString } from '../shared/string-utils'
import { isSafeUnquoted, isValidUnquotedKey } from '../shared/validation'
// #region Primitive encoding
@@ -36,74 +29,6 @@ export function encodeStringLiteral(value: string, delimiter: string = COMMA): s
return `${DOUBLE_QUOTE}${escapeString(value)}${DOUBLE_QUOTE}`
}
export function escapeString(value: string): string {
return value
.replace(/\\/g, `${BACKSLASH}${BACKSLASH}`)
.replace(/"/g, `${BACKSLASH}${DOUBLE_QUOTE}`)
.replace(/\n/g, `${BACKSLASH}n`)
.replace(/\r/g, `${BACKSLASH}r`)
.replace(/\t/g, `${BACKSLASH}t`)
}
export function isSafeUnquoted(value: string, delimiter: string = COMMA): boolean {
if (!value) {
return false
}
if (isPaddedWithWhitespace(value)) {
return false
}
if (value === TRUE_LITERAL || value === FALSE_LITERAL || value === NULL_LITERAL) {
return false
}
if (isNumericLike(value)) {
return false
}
// Check for colon (always structural)
if (value.includes(':')) {
return false
}
// Check for quotes and backslash (always need escaping)
if (value.includes('"') || value.includes('\\')) {
return false
}
// Check for brackets and braces (always structural)
if (/[[\]{}]/.test(value)) {
return false
}
// Check for control characters (newline, carriage return, tab - always need quoting/escaping)
if (/[\n\r\t]/.test(value)) {
return false
}
// Check for the active delimiter
if (value.includes(delimiter)) {
return false
}
// Check for hyphen at start (list marker)
if (value.startsWith(LIST_ITEM_MARKER)) {
return false
}
return true
}
function isNumericLike(value: string): boolean {
// Match numbers like: 42, -3.14, 1e-6, 05, etc.
return /^-?\d+(?:\.\d+)?(?:e[+-]?\d+)?$/i.test(value) || /^0\d+$/.test(value)
}
function isPaddedWithWhitespace(value: string): boolean {
return value !== value.trim()
}
// #endregion
// #region Key encoding
@@ -116,10 +41,6 @@ export function encodeKey(key: string): string {
return `${DOUBLE_QUOTE}${escapeString(key)}${DOUBLE_QUOTE}`
}
function isValidUnquotedKey(key: string): boolean {
return /^[A-Z_][\w.]*$/i.test(key)
}
// #endregion
// #region Value joining
@@ -132,9 +53,6 @@ export function encodeAndJoinPrimitives(values: readonly JsonPrimitive[], delimi
// #region Header formatters
/**
* Header formatter for arrays and tables with optional key prefix and field names
*/
export function formatHeader(
length: number,
options?: {

View File

@@ -1,10 +1,4 @@
import type {
DecodeOptions,
EncodeOptions,
JsonValue,
ResolvedDecodeOptions,
ResolvedEncodeOptions,
} from './types'
import type { DecodeOptions, EncodeOptions, JsonValue, ResolvedDecodeOptions, ResolvedEncodeOptions } from './types'
import { DEFAULT_DELIMITER } from './constants'
import { decodeValueFromLines } from './decode/decoders'
import { LineCursor, toParsedLines } from './decode/scanner'

View File

@@ -0,0 +1,28 @@
import { FALSE_LITERAL, NULL_LITERAL, TRUE_LITERAL } from '../constants'
/**
* Checks if a token is a boolean or null literal (`true`, `false`, `null`).
*/
export function isBooleanOrNullLiteral(token: string): boolean {
return token === TRUE_LITERAL || token === FALSE_LITERAL || token === NULL_LITERAL
}
/**
* Checks if a token represents a valid numeric literal.
*
* @remarks
* Rejects numbers with leading zeros (except `"0"` itself or decimals like `"0.5"`).
*/
export function isNumericLiteral(token: string): boolean {
if (!token)
return false
// Must not have leading zeros (except for `"0"` itself or decimals like `"0.5"`)
if (token.length > 1 && token[0] === '0' && token[1] !== '.') {
return false
}
// Check if it's a valid number
const num = Number(token)
return !Number.isNaN(num) && Number.isFinite(num)
}

127
src/shared/string-utils.ts Normal file
View File

@@ -0,0 +1,127 @@
import { BACKSLASH, CARRIAGE_RETURN, DOUBLE_QUOTE, NEWLINE, TAB } from '../constants'
/**
* Escapes special characters in a string for encoding.
*
* @remarks
* Handles backslashes, quotes, newlines, carriage returns, and tabs.
*/
export function escapeString(value: string): string {
return value
.replace(/\\/g, `${BACKSLASH}${BACKSLASH}`)
.replace(/"/g, `${BACKSLASH}${DOUBLE_QUOTE}`)
.replace(/\n/g, `${BACKSLASH}n`)
.replace(/\r/g, `${BACKSLASH}r`)
.replace(/\t/g, `${BACKSLASH}t`)
}
/**
* Unescapes a string by processing escape sequences.
*
* @remarks
* Handles `\n`, `\t`, `\r`, `\\`, and `\"` escape sequences.
*/
export function unescapeString(value: string): string {
let result = ''
let i = 0
while (i < value.length) {
if (value[i] === BACKSLASH) {
if (i + 1 >= value.length) {
throw new SyntaxError('Invalid escape sequence: backslash at end of string')
}
const next = value[i + 1]
if (next === 'n') {
result += NEWLINE
i += 2
continue
}
if (next === 't') {
result += TAB
i += 2
continue
}
if (next === 'r') {
result += CARRIAGE_RETURN
i += 2
continue
}
if (next === BACKSLASH) {
result += BACKSLASH
i += 2
continue
}
if (next === DOUBLE_QUOTE) {
result += DOUBLE_QUOTE
i += 2
continue
}
throw new SyntaxError(`Invalid escape sequence: \\${next}`)
}
result += value[i]
i++
}
return result
}
/**
* Finds the index of the closing double quote in a string, accounting for escape sequences.
*
* @param content The string to search in
* @param start The index of the opening quote
* @returns The index of the closing quote, or -1 if not found
*/
export function findClosingQuote(content: string, start: number): number {
let i = start + 1
while (i < content.length) {
if (content[i] === BACKSLASH && i + 1 < content.length) {
// Skip escaped character
i += 2
continue
}
if (content[i] === DOUBLE_QUOTE) {
return i
}
i++
}
return -1 // Not found
}
/**
* Finds the index of a specific character outside of quoted sections.
*
* @param content The string to search in
* @param char The character to look for
* @param start Optional starting index (defaults to 0)
* @returns The index of the character, or -1 if not found outside quotes
*/
export function findUnquotedChar(content: string, char: string, start = 0): number {
let inQuotes = false
let i = start
while (i < content.length) {
if (content[i] === BACKSLASH && i + 1 < content.length && inQuotes) {
// Skip escaped character
i += 2
continue
}
if (content[i] === DOUBLE_QUOTE) {
inQuotes = !inQuotes
i++
continue
}
if (content[i] === char && !inQuotes) {
return i
}
i++
}
return -1
}

84
src/shared/validation.ts Normal file
View File

@@ -0,0 +1,84 @@
import { COMMA, LIST_ITEM_MARKER } from '../constants'
import { isBooleanOrNullLiteral } from './literal-utils'
/**
* Checks if a key can be used without quotes.
*
* @remarks
* Valid unquoted keys must start with a letter or underscore,
* followed by letters, digits, underscores, or dots.
*/
export function isValidUnquotedKey(key: string): boolean {
return /^[A-Z_][\w.]*$/i.test(key)
}
/**
* Determines if a string value can be safely encoded without quotes.
*
* @remarks
* A string needs quoting if it:
* - Is empty
* - Has leading or trailing whitespace
* - Could be confused with a literal (boolean, null, number)
* - Contains structural characters (colons, brackets, braces)
* - Contains quotes or backslashes (need escaping)
* - Contains control characters (newlines, tabs, etc.)
* - Contains the active delimiter
* - Starts with a list marker (hyphen)
*/
export function isSafeUnquoted(value: string, delimiter: string = COMMA): boolean {
if (!value) {
return false
}
if (value !== value.trim()) {
return false
}
// Check if it looks like any literal value (boolean, null, or numeric)
if (isBooleanOrNullLiteral(value) || isNumericLike(value)) {
return false
}
// Check for colon (always structural)
if (value.includes(':')) {
return false
}
// Check for quotes and backslash (always need escaping)
if (value.includes('"') || value.includes('\\')) {
return false
}
// Check for brackets and braces (always structural)
if (/[[\]{}]/.test(value)) {
return false
}
// Check for control characters (newline, carriage return, tab - always need quoting/escaping)
if (/[\n\r\t]/.test(value)) {
return false
}
// Check for the active delimiter
if (value.includes(delimiter)) {
return false
}
// Check for hyphen at start (list marker)
if (value.startsWith(LIST_ITEM_MARKER)) {
return false
}
return true
}
/**
* Checks if a string looks like a number.
*
* @remarks
* Match numbers like `42`, `-3.14`, `1e-6`, `05`, etc.
*/
function isNumericLike(value: string): boolean {
return /^-?\d+(?:\.\d+)?(?:e[+-]?\d+)?$/i.test(value) || /^0\d+$/.test(value)
}

View File

@@ -32,6 +32,14 @@ describe('primitives', () => {
expect(decode('null')).toBe(null)
})
it('treats unquoted invalid numeric formats as strings', () => {
expect(decode('05')).toBe('05')
expect(decode('007')).toBe('007')
expect(decode('0123')).toBe('0123')
expect(decode('a: 05')).toEqual({ a: '05' })
expect(decode('nums[3]: 05,007,0123')).toEqual({ nums: ['05', '007', '0123'] })
})
it('respects ambiguity quoting (quoted primitives remain strings)', () => {
expect(decode('"true"')).toBe('true')
expect(decode('"false"')).toBe('false')