fix: handle empty list items and nested objects in list items (#65)

* fix: support quoted keys with array syntax

Fixes parsing of quoted keys followed by array syntax like:
"x-codeSamples"[1]{lang,label,source}:

Previously, parseArrayHeaderLine would skip any line starting with
a quoted key. This caused large OpenAPI specs (like Hetzner Cloud API)
to fail decoding.

Changes:
- Modified parseArrayHeaderLine to handle quoted keys
- Added logic to find bracket start after closing quote
- Unescape quoted keys properly
- Added 3 test cases for the new functionality

Closes #62

* fix: handle empty list items and nested objects in list items

This commit fixes two critical decoder bugs that prevented complex
OpenAPI specs (like DigitalOcean's 638 schemas) from being decoded:

1. Empty list items: Items encoded as just `-` (without space) were
   not recognized. The decoder only checked for `LIST_ITEM_PREFIX = '- '`.
   Fixed by adding check for both `- ` and `-` patterns.

2. Nested objects in list items: When a list item contains an object
   with nested properties (e.g., `allOf[2]: - properties: state: ...`),
   the decoder was looking for nested content at the wrong depth level.
   List items add one level of indentation, so nested content should be
   at baseDepth + 2, not baseDepth + 1.

   Fixed by creating `decodeKeyValueForListItem()` that correctly handles
   the extra nesting while maintaining proper followDepth for siblings.

Changes:
- Added `decodeKeyValueForListItem()` function to handle list item nesting
- Updated `decodeObjectFromListItem()` to use new function
- Added empty item detection in `decodeListArray()`
- Added comprehensive unit tests for both bugs
- Added integration test with real DigitalOcean OpenAPI spec (638 schemas)
- Gitignored large fixture files, added README with download instructions

Tests:
- 5 new unit tests in list-item-bugs.test.ts
- 1 integration test in digitalocean-decode.test.ts (skips if fixture missing)
- All 309 existing tests still pass

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

* perf: calculate depth on demand

* chore: move tests to test suite

* chore: test against new tests

---------

Co-authored-by: Claude <noreply@anthropic.com>
Co-authored-by: Johann Schopplich <mail@johannschopplich.com>
This commit is contained in:
Wind
2025-11-03 10:18:14 +03:00
committed by GitHub
parent cfc3dadb21
commit e414ca3671
5 changed files with 76 additions and 21 deletions

View File

@@ -38,6 +38,6 @@
"test": "vitest"
},
"devDependencies": {
"@toon-format/spec": "^1.3.0"
"@toon-format/spec": "^1.3.3"
}
}

View File

@@ -1,4 +1,4 @@
import type { ArrayHeaderInfo, Delimiter, Depth, JsonArray, JsonObject, JsonPrimitive, JsonValue, ParsedLine, ResolvedDecodeOptions } from '../types'
import type { ArrayHeaderInfo, Depth, JsonArray, JsonObject, JsonPrimitive, JsonValue, ParsedLine, ResolvedDecodeOptions } from '../types'
import type { LineCursor } from './scanner'
import { COLON, DEFAULT_DELIMITER, LIST_ITEM_PREFIX } from '../constants'
import { findClosingQuote } from '../shared/string-utils'
@@ -40,8 +40,8 @@ function isKeyValueLine(line: ParsedLine): boolean {
if (closingQuoteIndex === -1) {
return false
}
// Check if there's a colon after the quoted key
return closingQuoteIndex + 1 < content.length && content[closingQuoteIndex + 1] === COLON
// Check if colon exists after quoted key (may have array/brace syntax between)
return content.slice(closingQuoteIndex + 1).includes(COLON)
}
else {
// Unquoted key - look for first colon not inside quotes
@@ -56,17 +56,25 @@ function isKeyValueLine(line: ParsedLine): boolean {
function decodeObject(cursor: LineCursor, baseDepth: Depth, options: ResolvedDecodeOptions): JsonObject {
const obj: JsonObject = {}
// Detect the actual depth of the first field (may differ from baseDepth in nested structures)
let computedDepth: Depth | undefined
while (!cursor.atEnd()) {
const line = cursor.peek()
if (!line || line.depth < baseDepth) {
break
}
if (line.depth === baseDepth) {
const [key, value] = decodeKeyValuePair(line, cursor, baseDepth, options)
if (computedDepth === undefined && line.depth >= baseDepth) {
computedDepth = line.depth
}
if (line.depth === computedDepth) {
const [key, value] = decodeKeyValuePair(line, cursor, computedDepth, options)
obj[key] = value
}
else {
// Different depth (shallower or deeper) - stop object parsing
break
}
}
@@ -189,14 +197,17 @@ function decodeListArray(
break
}
if (line.depth === itemDepth && line.content.startsWith(LIST_ITEM_PREFIX)) {
// Check for list item (with or without space after hyphen)
const isListItem = line.content.startsWith(LIST_ITEM_PREFIX) || line.content === '-'
if (line.depth === itemDepth && isListItem) {
// Track first and last item line numbers
if (startLine === undefined) {
startLine = line.lineNumber
}
endLine = line.lineNumber
const item = decodeListItem(cursor, itemDepth, header.delimiter, options)
const item = decodeListItem(cursor, itemDepth, options)
items.push(item)
// Update endLine to the current cursor position (after item was decoded)
@@ -303,7 +314,6 @@ function decodeTabularArray(
function decodeListItem(
cursor: LineCursor,
baseDepth: Depth,
activeDelimiter: Delimiter,
options: ResolvedDecodeOptions,
): JsonValue {
const line = cursor.next()
@@ -311,7 +321,24 @@ function decodeListItem(
throw new ReferenceError('Expected list item')
}
const afterHyphen = line.content.slice(LIST_ITEM_PREFIX.length)
// Check for list item (with or without space after hyphen)
let afterHyphen: string
// Empty list item should be an empty object
if (line.content === '-') {
return {}
}
else if (line.content.startsWith(LIST_ITEM_PREFIX)) {
afterHyphen = line.content.slice(LIST_ITEM_PREFIX.length)
}
else {
throw new SyntaxError(`Expected list item to start with "${LIST_ITEM_PREFIX}"`)
}
// Empty content after list item should also be an empty object
if (!afterHyphen.trim()) {
return {}
}
// Check for array header after hyphen
if (isArrayHeaderAfterHyphen(afterHyphen)) {

View File

@@ -9,13 +9,33 @@ export function parseArrayHeaderLine(
content: string,
defaultDelimiter: Delimiter,
): { header: ArrayHeaderInfo, inlineValues?: string } | undefined {
// Don't match if the line starts with a quote (it's a quoted key, not an array)
if (content.trimStart().startsWith(DOUBLE_QUOTE)) {
return
const trimmed = content.trimStart()
// Find the bracket segment, accounting for quoted keys that may contain brackets
let bracketStart = -1
// For quoted keys, find bracket after closing quote (not inside the quoted string)
if (trimmed.startsWith(DOUBLE_QUOTE)) {
const closingQuoteIndex = findClosingQuote(trimmed, 0)
if (closingQuoteIndex === -1) {
return
}
const afterQuote = trimmed.slice(closingQuoteIndex + 1)
if (!afterQuote.startsWith(OPEN_BRACKET)) {
return
}
// Calculate position in original content and find bracket after the quoted key
const leadingWhitespace = content.length - trimmed.length
const keyEndIndex = leadingWhitespace + closingQuoteIndex + 1
bracketStart = content.indexOf(OPEN_BRACKET, keyEndIndex)
}
else {
// Unquoted key - find first bracket
bracketStart = content.indexOf(OPEN_BRACKET)
}
// Find the bracket segment first
const bracketStart = content.indexOf(OPEN_BRACKET)
if (bracketStart === -1) {
return
}
@@ -44,7 +64,13 @@ export function parseArrayHeaderLine(
return
}
const key = bracketStart > 0 ? content.slice(0, bracketStart) : undefined
// Extract and parse the key (might be quoted)
let key: string | undefined
if (bracketStart > 0) {
const rawKey = content.slice(0, bracketStart).trim()
key = rawKey.startsWith(DOUBLE_QUOTE) ? parseStringLiteral(rawKey) : rawKey
}
const afterColon = content.slice(colonIndex + 1).trim()
const bracketContent = content.slice(bracketStart + 1, bracketEnd)