mirror of
https://github.com/voson-wang/toon.git
synced 2026-01-29 23:34:10 +08:00
test: add case for unquoted invalid numeric formats as strings
This commit is contained in:
@@ -1,35 +1,9 @@
|
||||
import type {
|
||||
ArrayHeaderInfo,
|
||||
Delimiter,
|
||||
Depth,
|
||||
JsonArray,
|
||||
JsonObject,
|
||||
JsonPrimitive,
|
||||
JsonValue,
|
||||
ParsedLine,
|
||||
ResolvedDecodeOptions,
|
||||
} from '../types'
|
||||
import type { ArrayHeaderInfo, Delimiter, Depth, JsonArray, JsonObject, JsonPrimitive, JsonValue, ParsedLine, ResolvedDecodeOptions } from '../types'
|
||||
import type { LineCursor } from './scanner'
|
||||
import {
|
||||
COLON,
|
||||
DEFAULT_DELIMITER,
|
||||
LIST_ITEM_PREFIX,
|
||||
} from '../constants'
|
||||
import {
|
||||
isArrayHeaderAfterHyphen,
|
||||
isObjectFirstFieldAfterHyphen,
|
||||
mapRowValuesToPrimitives,
|
||||
parseArrayHeaderLine,
|
||||
parseDelimitedValues,
|
||||
parseKeyToken,
|
||||
parsePrimitiveToken,
|
||||
} from './parser'
|
||||
import { findClosingQuote } from './utils'
|
||||
import {
|
||||
assertExpectedCount,
|
||||
validateNoExtraListItems,
|
||||
validateNoExtraTabularRows,
|
||||
} from './validation'
|
||||
import { COLON, DEFAULT_DELIMITER, LIST_ITEM_PREFIX } from '../constants'
|
||||
import { findClosingQuote } from '../shared/string-utils'
|
||||
import { isArrayHeaderAfterHyphen, isObjectFirstFieldAfterHyphen, mapRowValuesToPrimitives, parseArrayHeaderLine, parseDelimitedValues, parseKeyToken, parsePrimitiveToken } from './parser'
|
||||
import { assertExpectedCount, validateNoExtraListItems, validateNoExtraTabularRows } from './validation'
|
||||
|
||||
// #region Entry decoding
|
||||
|
||||
|
||||
@@ -1,27 +1,7 @@
|
||||
import type {
|
||||
ArrayHeaderInfo,
|
||||
Delimiter,
|
||||
JsonPrimitive,
|
||||
} from '../types'
|
||||
import {
|
||||
BACKSLASH,
|
||||
CARRIAGE_RETURN,
|
||||
CLOSE_BRACE,
|
||||
CLOSE_BRACKET,
|
||||
COLON,
|
||||
DELIMITERS,
|
||||
DOUBLE_QUOTE,
|
||||
FALSE_LITERAL,
|
||||
HASH,
|
||||
NEWLINE,
|
||||
NULL_LITERAL,
|
||||
OPEN_BRACE,
|
||||
OPEN_BRACKET,
|
||||
PIPE,
|
||||
TAB,
|
||||
TRUE_LITERAL,
|
||||
} from '../constants'
|
||||
import { findClosingQuote, hasUnquotedChar } from './utils'
|
||||
import type { ArrayHeaderInfo, Delimiter, JsonPrimitive } from '../types'
|
||||
import { BACKSLASH, CLOSE_BRACE, CLOSE_BRACKET, COLON, DELIMITERS, DOUBLE_QUOTE, FALSE_LITERAL, HASH, NULL_LITERAL, OPEN_BRACE, OPEN_BRACKET, PIPE, TAB, TRUE_LITERAL } from '../constants'
|
||||
import { isBooleanOrNullLiteral, isNumericLiteral } from '../shared/literal-utils'
|
||||
import { findClosingQuote, findUnquotedChar, unescapeString } from '../shared/string-utils'
|
||||
|
||||
// #region Array header parsing
|
||||
|
||||
@@ -224,24 +204,6 @@ export function parsePrimitiveToken(token: string): JsonPrimitive {
|
||||
return trimmed
|
||||
}
|
||||
|
||||
export function isBooleanOrNullLiteral(token: string): boolean {
|
||||
return token === TRUE_LITERAL || token === FALSE_LITERAL || token === NULL_LITERAL
|
||||
}
|
||||
|
||||
export function isNumericLiteral(token: string): boolean {
|
||||
if (!token)
|
||||
return false
|
||||
|
||||
// Must not have leading zeros (except for "0" itself or decimals like "0.5")
|
||||
if (token.length > 1 && token[0] === '0' && token[1] !== '.') {
|
||||
return false
|
||||
}
|
||||
|
||||
// Check if it's a valid number
|
||||
const num = Number(token)
|
||||
return !Number.isNaN(num) && Number.isFinite(num)
|
||||
}
|
||||
|
||||
export function parseStringLiteral(token: string): string {
|
||||
const trimmed = token.trim()
|
||||
|
||||
@@ -265,53 +227,6 @@ export function parseStringLiteral(token: string): string {
|
||||
return trimmed
|
||||
}
|
||||
|
||||
export function unescapeString(value: string): string {
|
||||
let result = ''
|
||||
let i = 0
|
||||
|
||||
while (i < value.length) {
|
||||
if (value[i] === BACKSLASH) {
|
||||
if (i + 1 >= value.length) {
|
||||
throw new SyntaxError('Invalid escape sequence: backslash at end of string')
|
||||
}
|
||||
|
||||
const next = value[i + 1]
|
||||
if (next === 'n') {
|
||||
result += NEWLINE
|
||||
i += 2
|
||||
continue
|
||||
}
|
||||
if (next === 't') {
|
||||
result += TAB
|
||||
i += 2
|
||||
continue
|
||||
}
|
||||
if (next === 'r') {
|
||||
result += CARRIAGE_RETURN
|
||||
i += 2
|
||||
continue
|
||||
}
|
||||
if (next === BACKSLASH) {
|
||||
result += BACKSLASH
|
||||
i += 2
|
||||
continue
|
||||
}
|
||||
if (next === DOUBLE_QUOTE) {
|
||||
result += DOUBLE_QUOTE
|
||||
i += 2
|
||||
continue
|
||||
}
|
||||
|
||||
throw new SyntaxError(`Invalid escape sequence: \\${next}`)
|
||||
}
|
||||
|
||||
result += value[i]
|
||||
i++
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
export function parseUnquotedKey(content: string, start: number): { key: string, end: number } {
|
||||
let end = start
|
||||
while (end < content.length && content[end] !== COLON) {
|
||||
@@ -367,11 +282,11 @@ export function parseKeyToken(content: string, start: number): { key: string, en
|
||||
// #region Array content detection helpers
|
||||
|
||||
export function isArrayHeaderAfterHyphen(content: string): boolean {
|
||||
return content.trim().startsWith(OPEN_BRACKET) && hasUnquotedChar(content, COLON)
|
||||
return content.trim().startsWith(OPEN_BRACKET) && findUnquotedChar(content, COLON) !== -1
|
||||
}
|
||||
|
||||
export function isObjectFirstFieldAfterHyphen(content: string): boolean {
|
||||
return hasUnquotedChar(content, COLON)
|
||||
return findUnquotedChar(content, COLON) !== -1
|
||||
}
|
||||
|
||||
// #endregion
|
||||
|
||||
@@ -1,96 +0,0 @@
|
||||
import { BACKSLASH, DOUBLE_QUOTE } from '../constants'
|
||||
|
||||
/**
|
||||
* Finds the index of the closing double quote in a string, accounting for escape sequences.
|
||||
*
|
||||
* @param content The string to search in
|
||||
* @param start The index of the opening quote
|
||||
* @returns The index of the closing quote, or -1 if not found
|
||||
*/
|
||||
export function findClosingQuote(content: string, start: number): number {
|
||||
let i = start + 1
|
||||
while (i < content.length) {
|
||||
if (content[i] === BACKSLASH && i + 1 < content.length) {
|
||||
// Skip escaped character
|
||||
i += 2
|
||||
continue
|
||||
}
|
||||
if (content[i] === DOUBLE_QUOTE) {
|
||||
return i
|
||||
}
|
||||
i++
|
||||
}
|
||||
return -1 // Not found
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if a string contains a specific character outside of quoted sections.
|
||||
*
|
||||
* @param content The string to check
|
||||
* @param char The character to look for
|
||||
* @returns true if the character exists outside quotes, false otherwise
|
||||
*/
|
||||
export function hasUnquotedChar(content: string, char: string): boolean {
|
||||
return findUnquotedChar(content, char) !== -1
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the index of a specific character outside of quoted sections.
|
||||
*
|
||||
* @param content The string to search in
|
||||
* @param char The character to look for
|
||||
* @param start Optional starting index (defaults to 0)
|
||||
* @returns The index of the character, or -1 if not found outside quotes
|
||||
*/
|
||||
export function findUnquotedChar(content: string, char: string, start = 0): number {
|
||||
let inQuotes = false
|
||||
let i = start
|
||||
|
||||
while (i < content.length) {
|
||||
if (content[i] === BACKSLASH && i + 1 < content.length && inQuotes) {
|
||||
// Skip escaped character
|
||||
i += 2
|
||||
continue
|
||||
}
|
||||
|
||||
if (content[i] === DOUBLE_QUOTE) {
|
||||
inQuotes = !inQuotes
|
||||
i++
|
||||
continue
|
||||
}
|
||||
|
||||
if (content[i] === char && !inQuotes) {
|
||||
return i
|
||||
}
|
||||
|
||||
i++
|
||||
}
|
||||
|
||||
return -1
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if a string starts and ends with double quotes.
|
||||
*
|
||||
* @param content The string to check
|
||||
* @returns true if the string is quoted, false otherwise
|
||||
*/
|
||||
export function isQuotedString(content: string): boolean {
|
||||
const trimmed = content.trim()
|
||||
return trimmed.startsWith(DOUBLE_QUOTE) && trimmed.endsWith(DOUBLE_QUOTE) && trimmed.length >= 2
|
||||
}
|
||||
|
||||
/**
|
||||
* Skips whitespace characters starting from a given index.
|
||||
*
|
||||
* @param content The string to process
|
||||
* @param start The starting index
|
||||
* @returns The index of the first non-whitespace character, or content.length if all whitespace
|
||||
*/
|
||||
export function skipWhitespace(content: string, start: number): number {
|
||||
let i = start
|
||||
while (i < content.length && /\s/.test(content[i]!)) {
|
||||
i++
|
||||
}
|
||||
return i
|
||||
}
|
||||
@@ -7,7 +7,7 @@ import { COLON, LIST_ITEM_PREFIX } from '../constants'
|
||||
*
|
||||
* @param actual The actual count
|
||||
* @param expected The expected count
|
||||
* @param itemType The type of items being counted (e.g., 'list array items', 'tabular rows')
|
||||
* @param itemType The type of items being counted (e.g., `list array items`, `tabular rows`)
|
||||
* @param options Decode options
|
||||
* @throws RangeError if counts don't match in strict mode
|
||||
*/
|
||||
@@ -44,31 +44,6 @@ export function validateNoExtraListItems(
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if a line represents a data row (as opposed to a key-value pair) in a tabular array.
|
||||
*
|
||||
* @param content The line content
|
||||
* @param delimiter The delimiter used in the table
|
||||
* @returns true if the line is a data row, false if it's a key-value pair
|
||||
*/
|
||||
export function isDataRow(content: string, delimiter: Delimiter): boolean {
|
||||
const colonPos = content.indexOf(COLON)
|
||||
const delimiterPos = content.indexOf(delimiter)
|
||||
|
||||
// No colon = definitely a data row
|
||||
if (colonPos === -1) {
|
||||
return true
|
||||
}
|
||||
|
||||
// Has delimiter and it comes before colon = data row
|
||||
if (delimiterPos !== -1 && delimiterPos < colonPos) {
|
||||
return true
|
||||
}
|
||||
|
||||
// Colon before delimiter or no delimiter = key-value pair
|
||||
return false
|
||||
}
|
||||
|
||||
/**
|
||||
* Validates that there are no extra tabular rows beyond the expected count.
|
||||
*
|
||||
@@ -95,3 +70,28 @@ export function validateNoExtraTabularRows(
|
||||
throw new RangeError(`Expected ${header.length} tabular rows, but found more`)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if a line represents a data row (as opposed to a key-value pair) in a tabular array.
|
||||
*
|
||||
* @param content The line content
|
||||
* @param delimiter The delimiter used in the table
|
||||
* @returns true if the line is a data row, false if it's a key-value pair
|
||||
*/
|
||||
function isDataRow(content: string, delimiter: Delimiter): boolean {
|
||||
const colonPos = content.indexOf(COLON)
|
||||
const delimiterPos = content.indexOf(delimiter)
|
||||
|
||||
// No colon = definitely a data row
|
||||
if (colonPos === -1) {
|
||||
return true
|
||||
}
|
||||
|
||||
// Has delimiter and it comes before colon = data row
|
||||
if (delimiterPos !== -1 && delimiterPos < colonPos) {
|
||||
return true
|
||||
}
|
||||
|
||||
// Colon before delimiter or no delimiter = key-value pair
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -1,26 +1,7 @@
|
||||
import type {
|
||||
Depth,
|
||||
JsonArray,
|
||||
JsonObject,
|
||||
JsonPrimitive,
|
||||
JsonValue,
|
||||
ResolvedEncodeOptions,
|
||||
} from '../types'
|
||||
import type { Depth, JsonArray, JsonObject, JsonPrimitive, JsonValue, ResolvedEncodeOptions } from '../types'
|
||||
import { LIST_ITEM_MARKER } from '../constants'
|
||||
import {
|
||||
isArrayOfArrays,
|
||||
isArrayOfObjects,
|
||||
isArrayOfPrimitives,
|
||||
isJsonArray,
|
||||
isJsonObject,
|
||||
isJsonPrimitive,
|
||||
} from './normalize'
|
||||
import {
|
||||
encodeAndJoinPrimitives,
|
||||
encodeKey,
|
||||
encodePrimitive,
|
||||
formatHeader,
|
||||
} from './primitives'
|
||||
import { isArrayOfArrays, isArrayOfObjects, isArrayOfPrimitives, isJsonArray, isJsonObject, isJsonPrimitive } from './normalize'
|
||||
import { encodeAndJoinPrimitives, encodeKey, encodePrimitive, formatHeader } from './primitives'
|
||||
import { LineWriter } from './writer'
|
||||
|
||||
// #region Encode normalized JsonValue
|
||||
|
||||
@@ -1,9 +1,4 @@
|
||||
import type {
|
||||
JsonArray,
|
||||
JsonObject,
|
||||
JsonPrimitive,
|
||||
JsonValue,
|
||||
} from '../types'
|
||||
import type { JsonArray, JsonObject, JsonPrimitive, JsonValue } from '../types'
|
||||
|
||||
// #region Normalization (unknown → JsonValue)
|
||||
|
||||
|
||||
@@ -1,14 +1,7 @@
|
||||
import type { JsonPrimitive } from '../types'
|
||||
import {
|
||||
BACKSLASH,
|
||||
COMMA,
|
||||
DEFAULT_DELIMITER,
|
||||
DOUBLE_QUOTE,
|
||||
FALSE_LITERAL,
|
||||
LIST_ITEM_MARKER,
|
||||
NULL_LITERAL,
|
||||
TRUE_LITERAL,
|
||||
} from '../constants'
|
||||
import { COMMA, DEFAULT_DELIMITER, DOUBLE_QUOTE, NULL_LITERAL } from '../constants'
|
||||
import { escapeString } from '../shared/string-utils'
|
||||
import { isSafeUnquoted, isValidUnquotedKey } from '../shared/validation'
|
||||
|
||||
// #region Primitive encoding
|
||||
|
||||
@@ -36,74 +29,6 @@ export function encodeStringLiteral(value: string, delimiter: string = COMMA): s
|
||||
return `${DOUBLE_QUOTE}${escapeString(value)}${DOUBLE_QUOTE}`
|
||||
}
|
||||
|
||||
export function escapeString(value: string): string {
|
||||
return value
|
||||
.replace(/\\/g, `${BACKSLASH}${BACKSLASH}`)
|
||||
.replace(/"/g, `${BACKSLASH}${DOUBLE_QUOTE}`)
|
||||
.replace(/\n/g, `${BACKSLASH}n`)
|
||||
.replace(/\r/g, `${BACKSLASH}r`)
|
||||
.replace(/\t/g, `${BACKSLASH}t`)
|
||||
}
|
||||
|
||||
export function isSafeUnquoted(value: string, delimiter: string = COMMA): boolean {
|
||||
if (!value) {
|
||||
return false
|
||||
}
|
||||
|
||||
if (isPaddedWithWhitespace(value)) {
|
||||
return false
|
||||
}
|
||||
|
||||
if (value === TRUE_LITERAL || value === FALSE_LITERAL || value === NULL_LITERAL) {
|
||||
return false
|
||||
}
|
||||
|
||||
if (isNumericLike(value)) {
|
||||
return false
|
||||
}
|
||||
|
||||
// Check for colon (always structural)
|
||||
if (value.includes(':')) {
|
||||
return false
|
||||
}
|
||||
|
||||
// Check for quotes and backslash (always need escaping)
|
||||
if (value.includes('"') || value.includes('\\')) {
|
||||
return false
|
||||
}
|
||||
|
||||
// Check for brackets and braces (always structural)
|
||||
if (/[[\]{}]/.test(value)) {
|
||||
return false
|
||||
}
|
||||
|
||||
// Check for control characters (newline, carriage return, tab - always need quoting/escaping)
|
||||
if (/[\n\r\t]/.test(value)) {
|
||||
return false
|
||||
}
|
||||
|
||||
// Check for the active delimiter
|
||||
if (value.includes(delimiter)) {
|
||||
return false
|
||||
}
|
||||
|
||||
// Check for hyphen at start (list marker)
|
||||
if (value.startsWith(LIST_ITEM_MARKER)) {
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
function isNumericLike(value: string): boolean {
|
||||
// Match numbers like: 42, -3.14, 1e-6, 05, etc.
|
||||
return /^-?\d+(?:\.\d+)?(?:e[+-]?\d+)?$/i.test(value) || /^0\d+$/.test(value)
|
||||
}
|
||||
|
||||
function isPaddedWithWhitespace(value: string): boolean {
|
||||
return value !== value.trim()
|
||||
}
|
||||
|
||||
// #endregion
|
||||
|
||||
// #region Key encoding
|
||||
@@ -116,10 +41,6 @@ export function encodeKey(key: string): string {
|
||||
return `${DOUBLE_QUOTE}${escapeString(key)}${DOUBLE_QUOTE}`
|
||||
}
|
||||
|
||||
function isValidUnquotedKey(key: string): boolean {
|
||||
return /^[A-Z_][\w.]*$/i.test(key)
|
||||
}
|
||||
|
||||
// #endregion
|
||||
|
||||
// #region Value joining
|
||||
@@ -132,9 +53,6 @@ export function encodeAndJoinPrimitives(values: readonly JsonPrimitive[], delimi
|
||||
|
||||
// #region Header formatters
|
||||
|
||||
/**
|
||||
* Header formatter for arrays and tables with optional key prefix and field names
|
||||
*/
|
||||
export function formatHeader(
|
||||
length: number,
|
||||
options?: {
|
||||
|
||||
@@ -1,10 +1,4 @@
|
||||
import type {
|
||||
DecodeOptions,
|
||||
EncodeOptions,
|
||||
JsonValue,
|
||||
ResolvedDecodeOptions,
|
||||
ResolvedEncodeOptions,
|
||||
} from './types'
|
||||
import type { DecodeOptions, EncodeOptions, JsonValue, ResolvedDecodeOptions, ResolvedEncodeOptions } from './types'
|
||||
import { DEFAULT_DELIMITER } from './constants'
|
||||
import { decodeValueFromLines } from './decode/decoders'
|
||||
import { LineCursor, toParsedLines } from './decode/scanner'
|
||||
|
||||
28
src/shared/literal-utils.ts
Normal file
28
src/shared/literal-utils.ts
Normal file
@@ -0,0 +1,28 @@
|
||||
import { FALSE_LITERAL, NULL_LITERAL, TRUE_LITERAL } from '../constants'
|
||||
|
||||
/**
|
||||
* Checks if a token is a boolean or null literal (`true`, `false`, `null`).
|
||||
*/
|
||||
export function isBooleanOrNullLiteral(token: string): boolean {
|
||||
return token === TRUE_LITERAL || token === FALSE_LITERAL || token === NULL_LITERAL
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if a token represents a valid numeric literal.
|
||||
*
|
||||
* @remarks
|
||||
* Rejects numbers with leading zeros (except `"0"` itself or decimals like `"0.5"`).
|
||||
*/
|
||||
export function isNumericLiteral(token: string): boolean {
|
||||
if (!token)
|
||||
return false
|
||||
|
||||
// Must not have leading zeros (except for `"0"` itself or decimals like `"0.5"`)
|
||||
if (token.length > 1 && token[0] === '0' && token[1] !== '.') {
|
||||
return false
|
||||
}
|
||||
|
||||
// Check if it's a valid number
|
||||
const num = Number(token)
|
||||
return !Number.isNaN(num) && Number.isFinite(num)
|
||||
}
|
||||
127
src/shared/string-utils.ts
Normal file
127
src/shared/string-utils.ts
Normal file
@@ -0,0 +1,127 @@
|
||||
import { BACKSLASH, CARRIAGE_RETURN, DOUBLE_QUOTE, NEWLINE, TAB } from '../constants'
|
||||
|
||||
/**
|
||||
* Escapes special characters in a string for encoding.
|
||||
*
|
||||
* @remarks
|
||||
* Handles backslashes, quotes, newlines, carriage returns, and tabs.
|
||||
*/
|
||||
export function escapeString(value: string): string {
|
||||
return value
|
||||
.replace(/\\/g, `${BACKSLASH}${BACKSLASH}`)
|
||||
.replace(/"/g, `${BACKSLASH}${DOUBLE_QUOTE}`)
|
||||
.replace(/\n/g, `${BACKSLASH}n`)
|
||||
.replace(/\r/g, `${BACKSLASH}r`)
|
||||
.replace(/\t/g, `${BACKSLASH}t`)
|
||||
}
|
||||
|
||||
/**
|
||||
* Unescapes a string by processing escape sequences.
|
||||
*
|
||||
* @remarks
|
||||
* Handles `\n`, `\t`, `\r`, `\\`, and `\"` escape sequences.
|
||||
*/
|
||||
export function unescapeString(value: string): string {
|
||||
let result = ''
|
||||
let i = 0
|
||||
|
||||
while (i < value.length) {
|
||||
if (value[i] === BACKSLASH) {
|
||||
if (i + 1 >= value.length) {
|
||||
throw new SyntaxError('Invalid escape sequence: backslash at end of string')
|
||||
}
|
||||
|
||||
const next = value[i + 1]
|
||||
if (next === 'n') {
|
||||
result += NEWLINE
|
||||
i += 2
|
||||
continue
|
||||
}
|
||||
if (next === 't') {
|
||||
result += TAB
|
||||
i += 2
|
||||
continue
|
||||
}
|
||||
if (next === 'r') {
|
||||
result += CARRIAGE_RETURN
|
||||
i += 2
|
||||
continue
|
||||
}
|
||||
if (next === BACKSLASH) {
|
||||
result += BACKSLASH
|
||||
i += 2
|
||||
continue
|
||||
}
|
||||
if (next === DOUBLE_QUOTE) {
|
||||
result += DOUBLE_QUOTE
|
||||
i += 2
|
||||
continue
|
||||
}
|
||||
|
||||
throw new SyntaxError(`Invalid escape sequence: \\${next}`)
|
||||
}
|
||||
|
||||
result += value[i]
|
||||
i++
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the index of the closing double quote in a string, accounting for escape sequences.
|
||||
*
|
||||
* @param content The string to search in
|
||||
* @param start The index of the opening quote
|
||||
* @returns The index of the closing quote, or -1 if not found
|
||||
*/
|
||||
export function findClosingQuote(content: string, start: number): number {
|
||||
let i = start + 1
|
||||
while (i < content.length) {
|
||||
if (content[i] === BACKSLASH && i + 1 < content.length) {
|
||||
// Skip escaped character
|
||||
i += 2
|
||||
continue
|
||||
}
|
||||
if (content[i] === DOUBLE_QUOTE) {
|
||||
return i
|
||||
}
|
||||
i++
|
||||
}
|
||||
return -1 // Not found
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the index of a specific character outside of quoted sections.
|
||||
*
|
||||
* @param content The string to search in
|
||||
* @param char The character to look for
|
||||
* @param start Optional starting index (defaults to 0)
|
||||
* @returns The index of the character, or -1 if not found outside quotes
|
||||
*/
|
||||
export function findUnquotedChar(content: string, char: string, start = 0): number {
|
||||
let inQuotes = false
|
||||
let i = start
|
||||
|
||||
while (i < content.length) {
|
||||
if (content[i] === BACKSLASH && i + 1 < content.length && inQuotes) {
|
||||
// Skip escaped character
|
||||
i += 2
|
||||
continue
|
||||
}
|
||||
|
||||
if (content[i] === DOUBLE_QUOTE) {
|
||||
inQuotes = !inQuotes
|
||||
i++
|
||||
continue
|
||||
}
|
||||
|
||||
if (content[i] === char && !inQuotes) {
|
||||
return i
|
||||
}
|
||||
|
||||
i++
|
||||
}
|
||||
|
||||
return -1
|
||||
}
|
||||
84
src/shared/validation.ts
Normal file
84
src/shared/validation.ts
Normal file
@@ -0,0 +1,84 @@
|
||||
import { COMMA, LIST_ITEM_MARKER } from '../constants'
|
||||
import { isBooleanOrNullLiteral } from './literal-utils'
|
||||
|
||||
/**
|
||||
* Checks if a key can be used without quotes.
|
||||
*
|
||||
* @remarks
|
||||
* Valid unquoted keys must start with a letter or underscore,
|
||||
* followed by letters, digits, underscores, or dots.
|
||||
*/
|
||||
export function isValidUnquotedKey(key: string): boolean {
|
||||
return /^[A-Z_][\w.]*$/i.test(key)
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines if a string value can be safely encoded without quotes.
|
||||
*
|
||||
* @remarks
|
||||
* A string needs quoting if it:
|
||||
* - Is empty
|
||||
* - Has leading or trailing whitespace
|
||||
* - Could be confused with a literal (boolean, null, number)
|
||||
* - Contains structural characters (colons, brackets, braces)
|
||||
* - Contains quotes or backslashes (need escaping)
|
||||
* - Contains control characters (newlines, tabs, etc.)
|
||||
* - Contains the active delimiter
|
||||
* - Starts with a list marker (hyphen)
|
||||
*/
|
||||
export function isSafeUnquoted(value: string, delimiter: string = COMMA): boolean {
|
||||
if (!value) {
|
||||
return false
|
||||
}
|
||||
|
||||
if (value !== value.trim()) {
|
||||
return false
|
||||
}
|
||||
|
||||
// Check if it looks like any literal value (boolean, null, or numeric)
|
||||
if (isBooleanOrNullLiteral(value) || isNumericLike(value)) {
|
||||
return false
|
||||
}
|
||||
|
||||
// Check for colon (always structural)
|
||||
if (value.includes(':')) {
|
||||
return false
|
||||
}
|
||||
|
||||
// Check for quotes and backslash (always need escaping)
|
||||
if (value.includes('"') || value.includes('\\')) {
|
||||
return false
|
||||
}
|
||||
|
||||
// Check for brackets and braces (always structural)
|
||||
if (/[[\]{}]/.test(value)) {
|
||||
return false
|
||||
}
|
||||
|
||||
// Check for control characters (newline, carriage return, tab - always need quoting/escaping)
|
||||
if (/[\n\r\t]/.test(value)) {
|
||||
return false
|
||||
}
|
||||
|
||||
// Check for the active delimiter
|
||||
if (value.includes(delimiter)) {
|
||||
return false
|
||||
}
|
||||
|
||||
// Check for hyphen at start (list marker)
|
||||
if (value.startsWith(LIST_ITEM_MARKER)) {
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if a string looks like a number.
|
||||
*
|
||||
* @remarks
|
||||
* Match numbers like `42`, `-3.14`, `1e-6`, `05`, etc.
|
||||
*/
|
||||
function isNumericLike(value: string): boolean {
|
||||
return /^-?\d+(?:\.\d+)?(?:e[+-]?\d+)?$/i.test(value) || /^0\d+$/.test(value)
|
||||
}
|
||||
Reference in New Issue
Block a user