test: add case for unquoted invalid numeric formats as strings

2026-01-29 23:34:10 +08:00 · 2025-10-29 13:05:42 +01:00
parent b034c4455e
commit ee31be3bdc
12 changed files with 292 additions and 364 deletions
--- a/src/decode/decoders.ts
+++ b/src/decode/decoders.ts
@@ -1,35 +1,9 @@
-import type {
-  ArrayHeaderInfo,
-  Delimiter,
-  Depth,
-  JsonArray,
-  JsonObject,
-  JsonPrimitive,
-  JsonValue,
-  ParsedLine,
-  ResolvedDecodeOptions,
-} from '../types'
+import type { ArrayHeaderInfo, Delimiter, Depth, JsonArray, JsonObject, JsonPrimitive, JsonValue, ParsedLine, ResolvedDecodeOptions } from '../types'
 import type { LineCursor } from './scanner'
-import {
-  COLON,
-  DEFAULT_DELIMITER,
-  LIST_ITEM_PREFIX,
-} from '../constants'
-import {
-  isArrayHeaderAfterHyphen,
-  isObjectFirstFieldAfterHyphen,
-  mapRowValuesToPrimitives,
-  parseArrayHeaderLine,
-  parseDelimitedValues,
-  parseKeyToken,
-  parsePrimitiveToken,
-} from './parser'
-import { findClosingQuote } from './utils'
-import {
-  assertExpectedCount,
-  validateNoExtraListItems,
-  validateNoExtraTabularRows,
-} from './validation'
+import { COLON, DEFAULT_DELIMITER, LIST_ITEM_PREFIX } from '../constants'
+import { findClosingQuote } from '../shared/string-utils'
+import { isArrayHeaderAfterHyphen, isObjectFirstFieldAfterHyphen, mapRowValuesToPrimitives, parseArrayHeaderLine, parseDelimitedValues, parseKeyToken, parsePrimitiveToken } from './parser'
+import { assertExpectedCount, validateNoExtraListItems, validateNoExtraTabularRows } from './validation'

 // #region Entry decoding

--- a/src/decode/parser.ts
+++ b/src/decode/parser.ts
@@ -1,27 +1,7 @@
-import type {
-  ArrayHeaderInfo,
-  Delimiter,
-  JsonPrimitive,
-} from '../types'
-import {
-  BACKSLASH,
-  CARRIAGE_RETURN,
-  CLOSE_BRACE,
-  CLOSE_BRACKET,
-  COLON,
-  DELIMITERS,
-  DOUBLE_QUOTE,
-  FALSE_LITERAL,
-  HASH,
-  NEWLINE,
-  NULL_LITERAL,
-  OPEN_BRACE,
-  OPEN_BRACKET,
-  PIPE,
-  TAB,
-  TRUE_LITERAL,
-} from '../constants'
-import { findClosingQuote, hasUnquotedChar } from './utils'
+import type { ArrayHeaderInfo, Delimiter, JsonPrimitive } from '../types'
+import { BACKSLASH, CLOSE_BRACE, CLOSE_BRACKET, COLON, DELIMITERS, DOUBLE_QUOTE, FALSE_LITERAL, HASH, NULL_LITERAL, OPEN_BRACE, OPEN_BRACKET, PIPE, TAB, TRUE_LITERAL } from '../constants'
+import { isBooleanOrNullLiteral, isNumericLiteral } from '../shared/literal-utils'
+import { findClosingQuote, findUnquotedChar, unescapeString } from '../shared/string-utils'

 // #region Array header parsing

@@ -224,24 +204,6 @@ export function parsePrimitiveToken(token: string): JsonPrimitive {
  return trimmed
 }

-export function isBooleanOrNullLiteral(token: string): boolean {
-  return token === TRUE_LITERAL || token === FALSE_LITERAL || token === NULL_LITERAL
-}
-
-export function isNumericLiteral(token: string): boolean {
-  if (!token)
-    return false
-
-  // Must not have leading zeros (except for "0" itself or decimals like "0.5")
-  if (token.length > 1 && token[0] === '0' && token[1] !== '.') {
-    return false
-  }
-
-  // Check if it's a valid number
-  const num = Number(token)
-  return !Number.isNaN(num) && Number.isFinite(num)
-}
-
 export function parseStringLiteral(token: string): string {
  const trimmed = token.trim()

@@ -265,53 +227,6 @@ export function parseStringLiteral(token: string): string {
  return trimmed
 }

-export function unescapeString(value: string): string {
-  let result = ''
-  let i = 0
-
-  while (i < value.length) {
-    if (value[i] === BACKSLASH) {
-      if (i + 1 >= value.length) {
-        throw new SyntaxError('Invalid escape sequence: backslash at end of string')
-      }
-
-      const next = value[i + 1]
-      if (next === 'n') {
-        result += NEWLINE
-        i += 2
-        continue
-      }
-      if (next === 't') {
-        result += TAB
-        i += 2
-        continue
-      }
-      if (next === 'r') {
-        result += CARRIAGE_RETURN
-        i += 2
-        continue
-      }
-      if (next === BACKSLASH) {
-        result += BACKSLASH
-        i += 2
-        continue
-      }
-      if (next === DOUBLE_QUOTE) {
-        result += DOUBLE_QUOTE
-        i += 2
-        continue
-      }
-
-      throw new SyntaxError(`Invalid escape sequence: \\${next}`)
-    }
-
-    result += value[i]
-    i++
-  }
-
-  return result
-}
-
 export function parseUnquotedKey(content: string, start: number): { key: string, end: number } {
  let end = start
  while (end < content.length && content[end] !== COLON) {
@@ -367,11 +282,11 @@ export function parseKeyToken(content: string, start: number): { key: string, en
 // #region Array content detection helpers

 export function isArrayHeaderAfterHyphen(content: string): boolean {
-  return content.trim().startsWith(OPEN_BRACKET) && hasUnquotedChar(content, COLON)
+  return content.trim().startsWith(OPEN_BRACKET) && findUnquotedChar(content, COLON) !== -1
 }

 export function isObjectFirstFieldAfterHyphen(content: string): boolean {
-  return hasUnquotedChar(content, COLON)
+  return findUnquotedChar(content, COLON) !== -1
 }

 // #endregion
--- a/src/decode/utils.ts
+++ b/src/decode/utils.ts
@@ -1,96 +0,0 @@
-import { BACKSLASH, DOUBLE_QUOTE } from '../constants'
-
-/**
- * Finds the index of the closing double quote in a string, accounting for escape sequences.
- *
- * @param content The string to search in
- * @param start The index of the opening quote
- * @returns The index of the closing quote, or -1 if not found
- */
-export function findClosingQuote(content: string, start: number): number {
-  let i = start + 1
-  while (i < content.length) {
-    if (content[i] === BACKSLASH && i + 1 < content.length) {
-      // Skip escaped character
-      i += 2
-      continue
-    }
-    if (content[i] === DOUBLE_QUOTE) {
-      return i
-    }
-    i++
-  }
-  return -1 // Not found
-}
-
-/**
- * Checks if a string contains a specific character outside of quoted sections.
- *
- * @param content The string to check
- * @param char The character to look for
- * @returns true if the character exists outside quotes, false otherwise
- */
-export function hasUnquotedChar(content: string, char: string): boolean {
-  return findUnquotedChar(content, char) !== -1
-}
-
-/**
- * Finds the index of a specific character outside of quoted sections.
- *
- * @param content The string to search in
- * @param char The character to look for
- * @param start Optional starting index (defaults to 0)
- * @returns The index of the character, or -1 if not found outside quotes
- */
-export function findUnquotedChar(content: string, char: string, start = 0): number {
-  let inQuotes = false
-  let i = start
-
-  while (i < content.length) {
-    if (content[i] === BACKSLASH && i + 1 < content.length && inQuotes) {
-      // Skip escaped character
-      i += 2
-      continue
-    }
-
-    if (content[i] === DOUBLE_QUOTE) {
-      inQuotes = !inQuotes
-      i++
-      continue
-    }
-
-    if (content[i] === char && !inQuotes) {
-      return i
-    }
-
-    i++
-  }
-
-  return -1
-}
-
-/**
- * Checks if a string starts and ends with double quotes.
- *
- * @param content The string to check
- * @returns true if the string is quoted, false otherwise
- */
-export function isQuotedString(content: string): boolean {
-  const trimmed = content.trim()
-  return trimmed.startsWith(DOUBLE_QUOTE) && trimmed.endsWith(DOUBLE_QUOTE) && trimmed.length >= 2
-}
-
-/**
- * Skips whitespace characters starting from a given index.
- *
- * @param content The string to process
- * @param start The starting index
- * @returns The index of the first non-whitespace character, or content.length if all whitespace
- */
-export function skipWhitespace(content: string, start: number): number {
-  let i = start
-  while (i < content.length && /\s/.test(content[i]!)) {
-    i++
-  }
-  return i
-}
--- a/src/decode/validation.ts
+++ b/src/decode/validation.ts
@@ -7,7 +7,7 @@ import { COLON, LIST_ITEM_PREFIX } from '../constants'
 *
 * @param actual The actual count
 * @param expected The expected count
- * @param itemType The type of items being counted (e.g., 'list array items', 'tabular rows')
+ * @param itemType The type of items being counted (e.g., `list array items`, `tabular rows`)
 * @param options Decode options
 * @throws RangeError if counts don't match in strict mode
 */
@@ -44,31 +44,6 @@ export function validateNoExtraListItems(
  }
 }

-/**
- * Checks if a line represents a data row (as opposed to a key-value pair) in a tabular array.
- *
- * @param content The line content
- * @param delimiter The delimiter used in the table
- * @returns true if the line is a data row, false if it's a key-value pair
- */
-export function isDataRow(content: string, delimiter: Delimiter): boolean {
-  const colonPos = content.indexOf(COLON)
-  const delimiterPos = content.indexOf(delimiter)
-
-  // No colon = definitely a data row
-  if (colonPos === -1) {
-    return true
-  }
-
-  // Has delimiter and it comes before colon = data row
-  if (delimiterPos !== -1 && delimiterPos < colonPos) {
-    return true
-  }
-
-  // Colon before delimiter or no delimiter = key-value pair
-  return false
-}
-
 /**
 * Validates that there are no extra tabular rows beyond the expected count.
 *
@@ -95,3 +70,28 @@ export function validateNoExtraTabularRows(
    throw new RangeError(`Expected ${header.length} tabular rows, but found more`)
  }
 }
+
+/**
+ * Checks if a line represents a data row (as opposed to a key-value pair) in a tabular array.
+ *
+ * @param content The line content
+ * @param delimiter The delimiter used in the table
+ * @returns true if the line is a data row, false if it's a key-value pair
+ */
+function isDataRow(content: string, delimiter: Delimiter): boolean {
+  const colonPos = content.indexOf(COLON)
+  const delimiterPos = content.indexOf(delimiter)
+
+  // No colon = definitely a data row
+  if (colonPos === -1) {
+    return true
+  }
+
+  // Has delimiter and it comes before colon = data row
+  if (delimiterPos !== -1 && delimiterPos < colonPos) {
+    return true
+  }
+
+  // Colon before delimiter or no delimiter = key-value pair
+  return false
+}
--- a/src/encode/encoders.ts
+++ b/src/encode/encoders.ts
@@ -1,26 +1,7 @@
-import type {
-  Depth,
-  JsonArray,
-  JsonObject,
-  JsonPrimitive,
-  JsonValue,
-  ResolvedEncodeOptions,
-} from '../types'
+import type { Depth, JsonArray, JsonObject, JsonPrimitive, JsonValue, ResolvedEncodeOptions } from '../types'
 import { LIST_ITEM_MARKER } from '../constants'
-import {
-  isArrayOfArrays,
-  isArrayOfObjects,
-  isArrayOfPrimitives,
-  isJsonArray,
-  isJsonObject,
-  isJsonPrimitive,
-} from './normalize'
-import {
-  encodeAndJoinPrimitives,
-  encodeKey,
-  encodePrimitive,
-  formatHeader,
-} from './primitives'
+import { isArrayOfArrays, isArrayOfObjects, isArrayOfPrimitives, isJsonArray, isJsonObject, isJsonPrimitive } from './normalize'
+import { encodeAndJoinPrimitives, encodeKey, encodePrimitive, formatHeader } from './primitives'
 import { LineWriter } from './writer'

 // #region Encode normalized JsonValue
--- a/src/encode/normalize.ts
+++ b/src/encode/normalize.ts
@@ -1,9 +1,4 @@
-import type {
-  JsonArray,
-  JsonObject,
-  JsonPrimitive,
-  JsonValue,
-} from '../types'
+import type { JsonArray, JsonObject, JsonPrimitive, JsonValue } from '../types'

 // #region Normalization (unknown → JsonValue)

--- a/src/encode/primitives.ts
+++ b/src/encode/primitives.ts
@@ -1,14 +1,7 @@
 import type { JsonPrimitive } from '../types'
-import {
-  BACKSLASH,
-  COMMA,
-  DEFAULT_DELIMITER,
-  DOUBLE_QUOTE,
-  FALSE_LITERAL,
-  LIST_ITEM_MARKER,
-  NULL_LITERAL,
-  TRUE_LITERAL,
-} from '../constants'
+import { COMMA, DEFAULT_DELIMITER, DOUBLE_QUOTE, NULL_LITERAL } from '../constants'
+import { escapeString } from '../shared/string-utils'
+import { isSafeUnquoted, isValidUnquotedKey } from '../shared/validation'

 // #region Primitive encoding

@@ -36,74 +29,6 @@ export function encodeStringLiteral(value: string, delimiter: string = COMMA): s
  return `${DOUBLE_QUOTE}${escapeString(value)}${DOUBLE_QUOTE}`
 }

-export function escapeString(value: string): string {
-  return value
-    .replace(/\\/g, `${BACKSLASH}${BACKSLASH}`)
-    .replace(/"/g, `${BACKSLASH}${DOUBLE_QUOTE}`)
-    .replace(/\n/g, `${BACKSLASH}n`)
-    .replace(/\r/g, `${BACKSLASH}r`)
-    .replace(/\t/g, `${BACKSLASH}t`)
-}
-
-export function isSafeUnquoted(value: string, delimiter: string = COMMA): boolean {
-  if (!value) {
-    return false
-  }
-
-  if (isPaddedWithWhitespace(value)) {
-    return false
-  }
-
-  if (value === TRUE_LITERAL || value === FALSE_LITERAL || value === NULL_LITERAL) {
-    return false
-  }
-
-  if (isNumericLike(value)) {
-    return false
-  }
-
-  // Check for colon (always structural)
-  if (value.includes(':')) {
-    return false
-  }
-
-  // Check for quotes and backslash (always need escaping)
-  if (value.includes('"') || value.includes('\\')) {
-    return false
-  }
-
-  // Check for brackets and braces (always structural)
-  if (/[[\]{}]/.test(value)) {
-    return false
-  }
-
-  // Check for control characters (newline, carriage return, tab - always need quoting/escaping)
-  if (/[\n\r\t]/.test(value)) {
-    return false
-  }
-
-  // Check for the active delimiter
-  if (value.includes(delimiter)) {
-    return false
-  }
-
-  // Check for hyphen at start (list marker)
-  if (value.startsWith(LIST_ITEM_MARKER)) {
-    return false
-  }
-
-  return true
-}
-
-function isNumericLike(value: string): boolean {
-  // Match numbers like: 42, -3.14, 1e-6, 05, etc.
-  return /^-?\d+(?:\.\d+)?(?:e[+-]?\d+)?$/i.test(value) || /^0\d+$/.test(value)
-}
-
-function isPaddedWithWhitespace(value: string): boolean {
-  return value !== value.trim()
-}
-
 // #endregion

 // #region Key encoding
@@ -116,10 +41,6 @@ export function encodeKey(key: string): string {
  return `${DOUBLE_QUOTE}${escapeString(key)}${DOUBLE_QUOTE}`
 }

-function isValidUnquotedKey(key: string): boolean {
-  return /^[A-Z_][\w.]*$/i.test(key)
-}
-
 // #endregion

 // #region Value joining
@@ -132,9 +53,6 @@ export function encodeAndJoinPrimitives(values: readonly JsonPrimitive[], delimi

 // #region Header formatters

-/**
- * Header formatter for arrays and tables with optional key prefix and field names
- */
 export function formatHeader(
  length: number,
  options?: {
--- a/src/index.ts
+++ b/src/index.ts
@@ -1,10 +1,4 @@
-import type {
-  DecodeOptions,
-  EncodeOptions,
-  JsonValue,
-  ResolvedDecodeOptions,
-  ResolvedEncodeOptions,
-} from './types'
+import type { DecodeOptions, EncodeOptions, JsonValue, ResolvedDecodeOptions, ResolvedEncodeOptions } from './types'
 import { DEFAULT_DELIMITER } from './constants'
 import { decodeValueFromLines } from './decode/decoders'
 import { LineCursor, toParsedLines } from './decode/scanner'
--- a/src/shared/literal-utils.ts
+++ b/src/shared/literal-utils.ts
@@ -0,0 +1,28 @@
+import { FALSE_LITERAL, NULL_LITERAL, TRUE_LITERAL } from '../constants'
+
+/**
+ * Checks if a token is a boolean or null literal (`true`, `false`, `null`).
+ */
+export function isBooleanOrNullLiteral(token: string): boolean {
+  return token === TRUE_LITERAL || token === FALSE_LITERAL || token === NULL_LITERAL
+}
+
+/**
+ * Checks if a token represents a valid numeric literal.
+ *
+ * @remarks
+ * Rejects numbers with leading zeros (except `"0"` itself or decimals like `"0.5"`).
+ */
+export function isNumericLiteral(token: string): boolean {
+  if (!token)
+    return false
+
+  // Must not have leading zeros (except for `"0"` itself or decimals like `"0.5"`)
+  if (token.length > 1 && token[0] === '0' && token[1] !== '.') {
+    return false
+  }
+
+  // Check if it's a valid number
+  const num = Number(token)
+  return !Number.isNaN(num) && Number.isFinite(num)
+}
--- a/src/shared/string-utils.ts
+++ b/src/shared/string-utils.ts
@@ -0,0 +1,127 @@
+import { BACKSLASH, CARRIAGE_RETURN, DOUBLE_QUOTE, NEWLINE, TAB } from '../constants'
+
+/**
+ * Escapes special characters in a string for encoding.
+ *
+ * @remarks
+ * Handles backslashes, quotes, newlines, carriage returns, and tabs.
+ */
+export function escapeString(value: string): string {
+  return value
+    .replace(/\\/g, `${BACKSLASH}${BACKSLASH}`)
+    .replace(/"/g, `${BACKSLASH}${DOUBLE_QUOTE}`)
+    .replace(/\n/g, `${BACKSLASH}n`)
+    .replace(/\r/g, `${BACKSLASH}r`)
+    .replace(/\t/g, `${BACKSLASH}t`)
+}
+
+/**
+ * Unescapes a string by processing escape sequences.
+ *
+ * @remarks
+ * Handles `\n`, `\t`, `\r`, `\\`, and `\"` escape sequences.
+ */
+export function unescapeString(value: string): string {
+  let result = ''
+  let i = 0
+
+  while (i < value.length) {
+    if (value[i] === BACKSLASH) {
+      if (i + 1 >= value.length) {
+        throw new SyntaxError('Invalid escape sequence: backslash at end of string')
+      }
+
+      const next = value[i + 1]
+      if (next === 'n') {
+        result += NEWLINE
+        i += 2
+        continue
+      }
+      if (next === 't') {
+        result += TAB
+        i += 2
+        continue
+      }
+      if (next === 'r') {
+        result += CARRIAGE_RETURN
+        i += 2
+        continue
+      }
+      if (next === BACKSLASH) {
+        result += BACKSLASH
+        i += 2
+        continue
+      }
+      if (next === DOUBLE_QUOTE) {
+        result += DOUBLE_QUOTE
+        i += 2
+        continue
+      }
+
+      throw new SyntaxError(`Invalid escape sequence: \\${next}`)
+    }
+
+    result += value[i]
+    i++
+  }
+
+  return result
+}
+
+/**
+ * Finds the index of the closing double quote in a string, accounting for escape sequences.
+ *
+ * @param content The string to search in
+ * @param start The index of the opening quote
+ * @returns The index of the closing quote, or -1 if not found
+ */
+export function findClosingQuote(content: string, start: number): number {
+  let i = start + 1
+  while (i < content.length) {
+    if (content[i] === BACKSLASH && i + 1 < content.length) {
+      // Skip escaped character
+      i += 2
+      continue
+    }
+    if (content[i] === DOUBLE_QUOTE) {
+      return i
+    }
+    i++
+  }
+  return -1 // Not found
+}
+
+/**
+ * Finds the index of a specific character outside of quoted sections.
+ *
+ * @param content The string to search in
+ * @param char The character to look for
+ * @param start Optional starting index (defaults to 0)
+ * @returns The index of the character, or -1 if not found outside quotes
+ */
+export function findUnquotedChar(content: string, char: string, start = 0): number {
+  let inQuotes = false
+  let i = start
+
+  while (i < content.length) {
+    if (content[i] === BACKSLASH && i + 1 < content.length && inQuotes) {
+      // Skip escaped character
+      i += 2
+      continue
+    }
+
+    if (content[i] === DOUBLE_QUOTE) {
+      inQuotes = !inQuotes
+      i++
+      continue
+    }
+
+    if (content[i] === char && !inQuotes) {
+      return i
+    }
+
+    i++
+  }
+
+  return -1
+}
--- a/src/shared/validation.ts
+++ b/src/shared/validation.ts
@@ -0,0 +1,84 @@
+import { COMMA, LIST_ITEM_MARKER } from '../constants'
+import { isBooleanOrNullLiteral } from './literal-utils'
+
+/**
+ * Checks if a key can be used without quotes.
+ *
+ * @remarks
+ * Valid unquoted keys must start with a letter or underscore,
+ * followed by letters, digits, underscores, or dots.
+ */
+export function isValidUnquotedKey(key: string): boolean {
+  return /^[A-Z_][\w.]*$/i.test(key)
+}
+
+/**
+ * Determines if a string value can be safely encoded without quotes.
+ *
+ * @remarks
+ * A string needs quoting if it:
+ * - Is empty
+ * - Has leading or trailing whitespace
+ * - Could be confused with a literal (boolean, null, number)
+ * - Contains structural characters (colons, brackets, braces)
+ * - Contains quotes or backslashes (need escaping)
+ * - Contains control characters (newlines, tabs, etc.)
+ * - Contains the active delimiter
+ * - Starts with a list marker (hyphen)
+ */
+export function isSafeUnquoted(value: string, delimiter: string = COMMA): boolean {
+  if (!value) {
+    return false
+  }
+
+  if (value !== value.trim()) {
+    return false
+  }
+
+  // Check if it looks like any literal value (boolean, null, or numeric)
+  if (isBooleanOrNullLiteral(value) || isNumericLike(value)) {
+    return false
+  }
+
+  // Check for colon (always structural)
+  if (value.includes(':')) {
+    return false
+  }
+
+  // Check for quotes and backslash (always need escaping)
+  if (value.includes('"') || value.includes('\\')) {
+    return false
+  }
+
+  // Check for brackets and braces (always structural)
+  if (/[[\]{}]/.test(value)) {
+    return false
+  }
+
+  // Check for control characters (newline, carriage return, tab - always need quoting/escaping)
+  if (/[\n\r\t]/.test(value)) {
+    return false
+  }
+
+  // Check for the active delimiter
+  if (value.includes(delimiter)) {
+    return false
+  }
+
+  // Check for hyphen at start (list marker)
+  if (value.startsWith(LIST_ITEM_MARKER)) {
+    return false
+  }
+
+  return true
+}
+
+/**
+ * Checks if a string looks like a number.
+ *
+ * @remarks
+ * Match numbers like `42`, `-3.14`, `1e-6`, `05`, etc.
+ */
+function isNumericLike(value: string): boolean {
+  return /^-?\d+(?:\.\d+)?(?:e[+-]?\d+)?$/i.test(value) || /^0\d+$/.test(value)
+}