test: add case for unquoted invalid numeric formats as strings

2026-01-29 23:34:10 +08:00 · 2025-10-29 13:05:42 +01:00
parent b034c4455e
commit ee31be3bdc
12 changed files with 292 additions and 364 deletions
--- a/src/decode/decoders.ts
+++ b/src/decode/decoders.ts
@@ -1,35 +1,9 @@
-import type {
+import type { ArrayHeaderInfo, Delimiter, Depth, JsonArray, JsonObject, JsonPrimitive, JsonValue, ParsedLine, ResolvedDecodeOptions } from '../types'
  ArrayHeaderInfo,
  Delimiter,
  Depth,
  JsonArray,
  JsonObject,
  JsonPrimitive,
  JsonValue,
  ParsedLine,
  ResolvedDecodeOptions,
 } from '../types'
 import type { LineCursor } from './scanner'
-import {
+import { COLON, DEFAULT_DELIMITER, LIST_ITEM_PREFIX } from '../constants'
-  COLON,
+import { findClosingQuote } from '../shared/string-utils'
-  DEFAULT_DELIMITER,
+import { isArrayHeaderAfterHyphen, isObjectFirstFieldAfterHyphen, mapRowValuesToPrimitives, parseArrayHeaderLine, parseDelimitedValues, parseKeyToken, parsePrimitiveToken } from './parser'
-  LIST_ITEM_PREFIX,
+import { assertExpectedCount, validateNoExtraListItems, validateNoExtraTabularRows } from './validation'
 } from '../constants'
 import {
  isArrayHeaderAfterHyphen,
  isObjectFirstFieldAfterHyphen,
  mapRowValuesToPrimitives,
  parseArrayHeaderLine,
  parseDelimitedValues,
  parseKeyToken,
  parsePrimitiveToken,
 } from './parser'
 import { findClosingQuote } from './utils'
 import {
  assertExpectedCount,
  validateNoExtraListItems,
  validateNoExtraTabularRows,
 } from './validation'
 // #region Entry decoding
--- a/src/decode/parser.ts
+++ b/src/decode/parser.ts
@@ -1,27 +1,7 @@
-import type {
+import type { ArrayHeaderInfo, Delimiter, JsonPrimitive } from '../types'
-  ArrayHeaderInfo,
+import { BACKSLASH, CLOSE_BRACE, CLOSE_BRACKET, COLON, DELIMITERS, DOUBLE_QUOTE, FALSE_LITERAL, HASH, NULL_LITERAL, OPEN_BRACE, OPEN_BRACKET, PIPE, TAB, TRUE_LITERAL } from '../constants'
-  Delimiter,
+import { isBooleanOrNullLiteral, isNumericLiteral } from '../shared/literal-utils'
-  JsonPrimitive,
+import { findClosingQuote, findUnquotedChar, unescapeString } from '../shared/string-utils'
 } from '../types'
 import {
  BACKSLASH,
  CARRIAGE_RETURN,
  CLOSE_BRACE,
  CLOSE_BRACKET,
  COLON,
  DELIMITERS,
  DOUBLE_QUOTE,
  FALSE_LITERAL,
  HASH,
  NEWLINE,
  NULL_LITERAL,
  OPEN_BRACE,
  OPEN_BRACKET,
  PIPE,
  TAB,
  TRUE_LITERAL,
 } from '../constants'
 import { findClosingQuote, hasUnquotedChar } from './utils'
 // #region Array header parsing
@@ -224,24 +204,6 @@ export function parsePrimitiveToken(token: string): JsonPrimitive {
  return trimmed
 }
 export function isBooleanOrNullLiteral(token: string): boolean {
  return token === TRUE_LITERAL || token === FALSE_LITERAL || token === NULL_LITERAL
 }
 export function isNumericLiteral(token: string): boolean {
  if (!token)
    return false
  // Must not have leading zeros (except for "0" itself or decimals like "0.5")
  if (token.length > 1 && token[0] === '0' && token[1] !== '.') {
    return false
  }
  // Check if it's a valid number
  const num = Number(token)
  return !Number.isNaN(num) && Number.isFinite(num)
 }
 export function parseStringLiteral(token: string): string {
  const trimmed = token.trim()
@@ -265,53 +227,6 @@ export function parseStringLiteral(token: string): string {
  return trimmed
 }
 export function unescapeString(value: string): string {
  let result = ''
  let i = 0
  while (i < value.length) {
    if (value[i] === BACKSLASH) {
      if (i + 1 >= value.length) {
        throw new SyntaxError('Invalid escape sequence: backslash at end of string')
      }
      const next = value[i + 1]
      if (next === 'n') {
        result += NEWLINE
        i += 2
        continue
      }
      if (next === 't') {
        result += TAB
        i += 2
        continue
      }
      if (next === 'r') {
        result += CARRIAGE_RETURN
        i += 2
        continue
      }
      if (next === BACKSLASH) {
        result += BACKSLASH
        i += 2
        continue
      }
      if (next === DOUBLE_QUOTE) {
        result += DOUBLE_QUOTE
        i += 2
        continue
      }
      throw new SyntaxError(`Invalid escape sequence: \\${next}`)
    }
    result += value[i]
    i++
  }
  return result
 }
 export function parseUnquotedKey(content: string, start: number): { key: string, end: number } {
  let end = start
  while (end < content.length && content[end] !== COLON) {
@@ -367,11 +282,11 @@ export function parseKeyToken(content: string, start: number): { key: string, en
 // #region Array content detection helpers
 export function isArrayHeaderAfterHyphen(content: string): boolean {
-  return content.trim().startsWith(OPEN_BRACKET) && hasUnquotedChar(content, COLON)
+  return content.trim().startsWith(OPEN_BRACKET) && findUnquotedChar(content, COLON) !== -1
 }
 export function isObjectFirstFieldAfterHyphen(content: string): boolean {
-  return hasUnquotedChar(content, COLON)
+  return findUnquotedChar(content, COLON) !== -1
 }
 // #endregion
--- a/src/decode/utils.ts
+++ b/src/decode/utils.ts
@@ -1,96 +0,0 @@
 import { BACKSLASH, DOUBLE_QUOTE } from '../constants'
 /**
 * Finds the index of the closing double quote in a string, accounting for escape sequences.
 *
 * @param content The string to search in
 * @param start The index of the opening quote
 * @returns The index of the closing quote, or -1 if not found
 */
 export function findClosingQuote(content: string, start: number): number {
  let i = start + 1
  while (i < content.length) {
    if (content[i] === BACKSLASH && i + 1 < content.length) {
      // Skip escaped character
      i += 2
      continue
    }
    if (content[i] === DOUBLE_QUOTE) {
      return i
    }
    i++
  }
  return -1 // Not found
 }
 /**
 * Checks if a string contains a specific character outside of quoted sections.
 *
 * @param content The string to check
 * @param char The character to look for
 * @returns true if the character exists outside quotes, false otherwise
 */
 export function hasUnquotedChar(content: string, char: string): boolean {
  return findUnquotedChar(content, char) !== -1
 }
 /**
 * Finds the index of a specific character outside of quoted sections.
 *
 * @param content The string to search in
 * @param char The character to look for
 * @param start Optional starting index (defaults to 0)
 * @returns The index of the character, or -1 if not found outside quotes
 */
 export function findUnquotedChar(content: string, char: string, start = 0): number {
  let inQuotes = false
  let i = start
  while (i < content.length) {
    if (content[i] === BACKSLASH && i + 1 < content.length && inQuotes) {
      // Skip escaped character
      i += 2
      continue
    }
    if (content[i] === DOUBLE_QUOTE) {
      inQuotes = !inQuotes
      i++
      continue
    }
    if (content[i] === char && !inQuotes) {
      return i
    }
    i++
  }
  return -1
 }
 /**
 * Checks if a string starts and ends with double quotes.
 *
 * @param content The string to check
 * @returns true if the string is quoted, false otherwise
 */
 export function isQuotedString(content: string): boolean {
  const trimmed = content.trim()
  return trimmed.startsWith(DOUBLE_QUOTE) && trimmed.endsWith(DOUBLE_QUOTE) && trimmed.length >= 2
 }
 /**
 * Skips whitespace characters starting from a given index.
 *
 * @param content The string to process
 * @param start The starting index
 * @returns The index of the first non-whitespace character, or content.length if all whitespace
 */
 export function skipWhitespace(content: string, start: number): number {
  let i = start
  while (i < content.length && /\s/.test(content[i]!)) {
    i++
  }
  return i
 }
--- a/src/decode/validation.ts
+++ b/src/decode/validation.ts
@@ -7,7 +7,7 @@ import { COLON, LIST_ITEM_PREFIX } from '../constants'
 *
 * @param actual The actual count
 * @param expected The expected count
- * @param itemType The type of items being counted (e.g., 'list array items', 'tabular rows')
+ * @param itemType The type of items being counted (e.g., `list array items`, `tabular rows`)
 * @param options Decode options
 * @throws RangeError if counts don't match in strict mode
 */
@@ -44,31 +44,6 @@ export function validateNoExtraListItems(
  }
 }
 /**
 * Checks if a line represents a data row (as opposed to a key-value pair) in a tabular array.
 *
 * @param content The line content
 * @param delimiter The delimiter used in the table
 * @returns true if the line is a data row, false if it's a key-value pair
 */
 export function isDataRow(content: string, delimiter: Delimiter): boolean {
  const colonPos = content.indexOf(COLON)
  const delimiterPos = content.indexOf(delimiter)
  // No colon = definitely a data row
  if (colonPos === -1) {
    return true
  }
  // Has delimiter and it comes before colon = data row
  if (delimiterPos !== -1 && delimiterPos < colonPos) {
    return true
  }
  // Colon before delimiter or no delimiter = key-value pair
  return false
 }
 /**
 * Validates that there are no extra tabular rows beyond the expected count.
 *
@@ -95,3 +70,28 @@ export function validateNoExtraTabularRows(
    throw new RangeError(`Expected ${header.length} tabular rows, but found more`)
  }
 }
 /**
 * Checks if a line represents a data row (as opposed to a key-value pair) in a tabular array.
 *
 * @param content The line content
 * @param delimiter The delimiter used in the table
 * @returns true if the line is a data row, false if it's a key-value pair
 */
 function isDataRow(content: string, delimiter: Delimiter): boolean {
  const colonPos = content.indexOf(COLON)
  const delimiterPos = content.indexOf(delimiter)
  // No colon = definitely a data row
  if (colonPos === -1) {
    return true
  }
  // Has delimiter and it comes before colon = data row
  if (delimiterPos !== -1 && delimiterPos < colonPos) {
    return true
  }
  // Colon before delimiter or no delimiter = key-value pair
  return false
 }
--- a/src/encode/encoders.ts
+++ b/src/encode/encoders.ts
@@ -1,26 +1,7 @@
-import type {
+import type { Depth, JsonArray, JsonObject, JsonPrimitive, JsonValue, ResolvedEncodeOptions } from '../types'
  Depth,
  JsonArray,
  JsonObject,
  JsonPrimitive,
  JsonValue,
  ResolvedEncodeOptions,
 } from '../types'
 import { LIST_ITEM_MARKER } from '../constants'
-import {
+import { isArrayOfArrays, isArrayOfObjects, isArrayOfPrimitives, isJsonArray, isJsonObject, isJsonPrimitive } from './normalize'
-  isArrayOfArrays,
+import { encodeAndJoinPrimitives, encodeKey, encodePrimitive, formatHeader } from './primitives'
  isArrayOfObjects,
  isArrayOfPrimitives,
  isJsonArray,
  isJsonObject,
  isJsonPrimitive,
 } from './normalize'
 import {
  encodeAndJoinPrimitives,
  encodeKey,
  encodePrimitive,
  formatHeader,
 } from './primitives'
 import { LineWriter } from './writer'
 // #region Encode normalized JsonValue
--- a/src/encode/normalize.ts
+++ b/src/encode/normalize.ts
@@ -1,9 +1,4 @@
-import type {
+import type { JsonArray, JsonObject, JsonPrimitive, JsonValue } from '../types'
  JsonArray,
  JsonObject,
  JsonPrimitive,
  JsonValue,
 } from '../types'
 // #region Normalization (unknown → JsonValue)
--- a/src/encode/primitives.ts
+++ b/src/encode/primitives.ts
@@ -1,14 +1,7 @@
 import type { JsonPrimitive } from '../types'
-import {
+import { COMMA, DEFAULT_DELIMITER, DOUBLE_QUOTE, NULL_LITERAL } from '../constants'
-  BACKSLASH,
+import { escapeString } from '../shared/string-utils'
-  COMMA,
+import { isSafeUnquoted, isValidUnquotedKey } from '../shared/validation'
  DEFAULT_DELIMITER,
  DOUBLE_QUOTE,
  FALSE_LITERAL,
  LIST_ITEM_MARKER,
  NULL_LITERAL,
  TRUE_LITERAL,
 } from '../constants'
 // #region Primitive encoding
@@ -36,74 +29,6 @@ export function encodeStringLiteral(value: string, delimiter: string = COMMA): s
  return `${DOUBLE_QUOTE}${escapeString(value)}${DOUBLE_QUOTE}`
 }
 export function escapeString(value: string): string {
  return value
    .replace(/\\/g, `${BACKSLASH}${BACKSLASH}`)
    .replace(/"/g, `${BACKSLASH}${DOUBLE_QUOTE}`)
    .replace(/\n/g, `${BACKSLASH}n`)
    .replace(/\r/g, `${BACKSLASH}r`)
    .replace(/\t/g, `${BACKSLASH}t`)
 }
 export function isSafeUnquoted(value: string, delimiter: string = COMMA): boolean {
  if (!value) {
    return false
  }
  if (isPaddedWithWhitespace(value)) {
    return false
  }
  if (value === TRUE_LITERAL || value === FALSE_LITERAL || value === NULL_LITERAL) {
    return false
  }
  if (isNumericLike(value)) {
    return false
  }
  // Check for colon (always structural)
  if (value.includes(':')) {
    return false
  }
  // Check for quotes and backslash (always need escaping)
  if (value.includes('"') || value.includes('\\')) {
    return false
  }
  // Check for brackets and braces (always structural)
  if (/[[\]{}]/.test(value)) {
    return false
  }
  // Check for control characters (newline, carriage return, tab - always need quoting/escaping)
  if (/[\n\r\t]/.test(value)) {
    return false
  }
  // Check for the active delimiter
  if (value.includes(delimiter)) {
    return false
  }
  // Check for hyphen at start (list marker)
  if (value.startsWith(LIST_ITEM_MARKER)) {
    return false
  }
  return true
 }
 function isNumericLike(value: string): boolean {
  // Match numbers like: 42, -3.14, 1e-6, 05, etc.
  return /^-?\d+(?:\.\d+)?(?:e[+-]?\d+)?$/i.test(value) || /^0\d+$/.test(value)
 }
 function isPaddedWithWhitespace(value: string): boolean {
  return value !== value.trim()
 }
 // #endregion
 // #region Key encoding
@@ -116,10 +41,6 @@ export function encodeKey(key: string): string {
  return `${DOUBLE_QUOTE}${escapeString(key)}${DOUBLE_QUOTE}`
 }
 function isValidUnquotedKey(key: string): boolean {
  return /^[A-Z_][\w.]*$/i.test(key)
 }
 // #endregion
 // #region Value joining
@@ -132,9 +53,6 @@ export function encodeAndJoinPrimitives(values: readonly JsonPrimitive[], delimi
 // #region Header formatters
 /**
 * Header formatter for arrays and tables with optional key prefix and field names
 */
 export function formatHeader(
  length: number,
  options?: {
--- a/src/index.ts
+++ b/src/index.ts
@@ -1,10 +1,4 @@
-import type {
+import type { DecodeOptions, EncodeOptions, JsonValue, ResolvedDecodeOptions, ResolvedEncodeOptions } from './types'
  DecodeOptions,
  EncodeOptions,
  JsonValue,
  ResolvedDecodeOptions,
  ResolvedEncodeOptions,
 } from './types'
 import { DEFAULT_DELIMITER } from './constants'
 import { decodeValueFromLines } from './decode/decoders'
 import { LineCursor, toParsedLines } from './decode/scanner'
--- a/src/shared/literal-utils.ts
+++ b/src/shared/literal-utils.ts
@@ -0,0 +1,28 @@
 import { FALSE_LITERAL, NULL_LITERAL, TRUE_LITERAL } from '../constants'
 /**
 * Checks if a token is a boolean or null literal (`true`, `false`, `null`).
 */
 export function isBooleanOrNullLiteral(token: string): boolean {
  return token === TRUE_LITERAL || token === FALSE_LITERAL || token === NULL_LITERAL
 }
 /**
 * Checks if a token represents a valid numeric literal.
 *
 * @remarks
 * Rejects numbers with leading zeros (except `"0"` itself or decimals like `"0.5"`).
 */
 export function isNumericLiteral(token: string): boolean {
  if (!token)
    return false
  // Must not have leading zeros (except for `"0"` itself or decimals like `"0.5"`)
  if (token.length > 1 && token[0] === '0' && token[1] !== '.') {
    return false
  }
  // Check if it's a valid number
  const num = Number(token)
  return !Number.isNaN(num) && Number.isFinite(num)
 }
--- a/src/shared/string-utils.ts
+++ b/src/shared/string-utils.ts
@@ -0,0 +1,127 @@
 import { BACKSLASH, CARRIAGE_RETURN, DOUBLE_QUOTE, NEWLINE, TAB } from '../constants'
 /**
 * Escapes special characters in a string for encoding.
 *
 * @remarks
 * Handles backslashes, quotes, newlines, carriage returns, and tabs.
 */
 export function escapeString(value: string): string {
  return value
    .replace(/\\/g, `${BACKSLASH}${BACKSLASH}`)
    .replace(/"/g, `${BACKSLASH}${DOUBLE_QUOTE}`)
    .replace(/\n/g, `${BACKSLASH}n`)
    .replace(/\r/g, `${BACKSLASH}r`)
    .replace(/\t/g, `${BACKSLASH}t`)
 }
 /**
 * Unescapes a string by processing escape sequences.
 *
 * @remarks
 * Handles `\n`, `\t`, `\r`, `\\`, and `\"` escape sequences.
 */
 export function unescapeString(value: string): string {
  let result = ''
  let i = 0
  while (i < value.length) {
    if (value[i] === BACKSLASH) {
      if (i + 1 >= value.length) {
        throw new SyntaxError('Invalid escape sequence: backslash at end of string')
      }
      const next = value[i + 1]
      if (next === 'n') {
        result += NEWLINE
        i += 2
        continue
      }
      if (next === 't') {
        result += TAB
        i += 2
        continue
      }
      if (next === 'r') {
        result += CARRIAGE_RETURN
        i += 2
        continue
      }
      if (next === BACKSLASH) {
        result += BACKSLASH
        i += 2
        continue
      }
      if (next === DOUBLE_QUOTE) {
        result += DOUBLE_QUOTE
        i += 2
        continue
      }
      throw new SyntaxError(`Invalid escape sequence: \\${next}`)
    }
    result += value[i]
    i++
  }
  return result
 }
 /**
 * Finds the index of the closing double quote in a string, accounting for escape sequences.
 *
 * @param content The string to search in
 * @param start The index of the opening quote
 * @returns The index of the closing quote, or -1 if not found
 */
 export function findClosingQuote(content: string, start: number): number {
  let i = start + 1
  while (i < content.length) {
    if (content[i] === BACKSLASH && i + 1 < content.length) {
      // Skip escaped character
      i += 2
      continue
    }
    if (content[i] === DOUBLE_QUOTE) {
      return i
    }
    i++
  }
  return -1 // Not found
 }
 /**
 * Finds the index of a specific character outside of quoted sections.
 *
 * @param content The string to search in
 * @param char The character to look for
 * @param start Optional starting index (defaults to 0)
 * @returns The index of the character, or -1 if not found outside quotes
 */
 export function findUnquotedChar(content: string, char: string, start = 0): number {
  let inQuotes = false
  let i = start
  while (i < content.length) {
    if (content[i] === BACKSLASH && i + 1 < content.length && inQuotes) {
      // Skip escaped character
      i += 2
      continue
    }
    if (content[i] === DOUBLE_QUOTE) {
      inQuotes = !inQuotes
      i++
      continue
    }
    if (content[i] === char && !inQuotes) {
      return i
    }
    i++
  }
  return -1
 }
--- a/src/shared/validation.ts
+++ b/src/shared/validation.ts
@@ -0,0 +1,84 @@
 import { COMMA, LIST_ITEM_MARKER } from '../constants'
 import { isBooleanOrNullLiteral } from './literal-utils'
 /**
 * Checks if a key can be used without quotes.
 *
 * @remarks
 * Valid unquoted keys must start with a letter or underscore,
 * followed by letters, digits, underscores, or dots.
 */
 export function isValidUnquotedKey(key: string): boolean {
  return /^[A-Z_][\w.]*$/i.test(key)
 }
 /**
 * Determines if a string value can be safely encoded without quotes.
 *
 * @remarks
 * A string needs quoting if it:
 * - Is empty
 * - Has leading or trailing whitespace
 * - Could be confused with a literal (boolean, null, number)
 * - Contains structural characters (colons, brackets, braces)
 * - Contains quotes or backslashes (need escaping)
 * - Contains control characters (newlines, tabs, etc.)
 * - Contains the active delimiter
 * - Starts with a list marker (hyphen)
 */
 export function isSafeUnquoted(value: string, delimiter: string = COMMA): boolean {
  if (!value) {
    return false
  }
  if (value !== value.trim()) {
    return false
  }
  // Check if it looks like any literal value (boolean, null, or numeric)
  if (isBooleanOrNullLiteral(value) || isNumericLike(value)) {
    return false
  }
  // Check for colon (always structural)
  if (value.includes(':')) {
    return false
  }
  // Check for quotes and backslash (always need escaping)
  if (value.includes('"') || value.includes('\\')) {
    return false
  }
  // Check for brackets and braces (always structural)
  if (/[[\]{}]/.test(value)) {
    return false
  }
  // Check for control characters (newline, carriage return, tab - always need quoting/escaping)
  if (/[\n\r\t]/.test(value)) {
    return false
  }
  // Check for the active delimiter
  if (value.includes(delimiter)) {
    return false
  }
  // Check for hyphen at start (list marker)
  if (value.startsWith(LIST_ITEM_MARKER)) {
    return false
  }
  return true
 }
 /**
 * Checks if a string looks like a number.
 *
 * @remarks
 * Match numbers like `42`, `-3.14`, `1e-6`, `05`, etc.
 */
 function isNumericLike(value: string): boolean {
  return /^-?\d+(?:\.\d+)?(?:e[+-]?\d+)?$/i.test(value) || /^0\d+$/.test(value)
 }
--- a/test/decode.test.ts
+++ b/test/decode.test.ts
@@ -32,6 +32,14 @@ describe('primitives', () => {
    expect(decode('null')).toBe(null)
  })
  it('treats unquoted invalid numeric formats as strings', () => {
    expect(decode('05')).toBe('05')
    expect(decode('007')).toBe('007')
    expect(decode('0123')).toBe('0123')
    expect(decode('a: 05')).toEqual({ a: '05' })
    expect(decode('nums[3]: 05,007,0123')).toEqual({ nums: ['05', '007', '0123'] })
  })
  it('respects ambiguity quoting (quoted primitives remain strings)', () => {
    expect(decode('"true"')).toBe('true')
    expect(decode('"false"')).toBe('false')