fix: handle empty list items and nested objects in list items (#65)

* fix: support quoted keys with array syntax

Fixes parsing of quoted keys followed by array syntax like:
"x-codeSamples"[1]{lang,label,source}:

Previously, parseArrayHeaderLine would skip any line starting with
a quoted key. This caused large OpenAPI specs (like Hetzner Cloud API)
to fail decoding.

Changes:
- Modified parseArrayHeaderLine to handle quoted keys
- Added logic to find bracket start after closing quote
- Unescape quoted keys properly
- Added 3 test cases for the new functionality

Closes #62

* fix: handle empty list items and nested objects in list items

This commit fixes two critical decoder bugs that prevented complex
OpenAPI specs (like DigitalOcean's 638 schemas) from being decoded:

1. Empty list items: Items encoded as just `-` (without space) were
   not recognized. The decoder only checked for `LIST_ITEM_PREFIX = '- '`.
   Fixed by adding check for both `- ` and `-` patterns.

2. Nested objects in list items: When a list item contains an object
   with nested properties (e.g., `allOf[2]: - properties: state: ...`),
   the decoder was looking for nested content at the wrong depth level.
   List items add one level of indentation, so nested content should be
   at baseDepth + 2, not baseDepth + 1.

   Fixed by creating `decodeKeyValueForListItem()` that correctly handles
   the extra nesting while maintaining proper followDepth for siblings.

Changes:
- Added `decodeKeyValueForListItem()` function to handle list item nesting
- Updated `decodeObjectFromListItem()` to use new function
- Added empty item detection in `decodeListArray()`
- Added comprehensive unit tests for both bugs
- Added integration test with real DigitalOcean OpenAPI spec (638 schemas)
- Gitignored large fixture files, added README with download instructions

Tests:
- 5 new unit tests in list-item-bugs.test.ts
- 1 integration test in digitalocean-decode.test.ts (skips if fixture missing)
- All 309 existing tests still pass

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

* perf: calculate depth on demand

* chore: move tests to test suite

* chore: test against new tests

---------

Co-authored-by: Claude <noreply@anthropic.com>
Co-authored-by: Johann Schopplich <mail@johannschopplich.com>
This commit is contained in:
Wind
2025-11-03 10:18:14 +03:00
committed by GitHub
parent cfc3dadb21
commit e414ca3671
5 changed files with 76 additions and 21 deletions

2
.gitignore vendored
View File

@@ -2,3 +2,5 @@ dist
node_modules node_modules
.DS_Store .DS_Store
.env .env
packages/toon/test/fixtures/*.json
packages/toon/test/fixtures/*.toon

View File

@@ -38,6 +38,6 @@
"test": "vitest" "test": "vitest"
}, },
"devDependencies": { "devDependencies": {
"@toon-format/spec": "^1.3.0" "@toon-format/spec": "^1.3.3"
} }
} }

View File

@@ -1,4 +1,4 @@
import type { ArrayHeaderInfo, Delimiter, Depth, JsonArray, JsonObject, JsonPrimitive, JsonValue, ParsedLine, ResolvedDecodeOptions } from '../types' import type { ArrayHeaderInfo, Depth, JsonArray, JsonObject, JsonPrimitive, JsonValue, ParsedLine, ResolvedDecodeOptions } from '../types'
import type { LineCursor } from './scanner' import type { LineCursor } from './scanner'
import { COLON, DEFAULT_DELIMITER, LIST_ITEM_PREFIX } from '../constants' import { COLON, DEFAULT_DELIMITER, LIST_ITEM_PREFIX } from '../constants'
import { findClosingQuote } from '../shared/string-utils' import { findClosingQuote } from '../shared/string-utils'
@@ -40,8 +40,8 @@ function isKeyValueLine(line: ParsedLine): boolean {
if (closingQuoteIndex === -1) { if (closingQuoteIndex === -1) {
return false return false
} }
// Check if there's a colon after the quoted key // Check if colon exists after quoted key (may have array/brace syntax between)
return closingQuoteIndex + 1 < content.length && content[closingQuoteIndex + 1] === COLON return content.slice(closingQuoteIndex + 1).includes(COLON)
} }
else { else {
// Unquoted key - look for first colon not inside quotes // Unquoted key - look for first colon not inside quotes
@@ -56,17 +56,25 @@ function isKeyValueLine(line: ParsedLine): boolean {
function decodeObject(cursor: LineCursor, baseDepth: Depth, options: ResolvedDecodeOptions): JsonObject { function decodeObject(cursor: LineCursor, baseDepth: Depth, options: ResolvedDecodeOptions): JsonObject {
const obj: JsonObject = {} const obj: JsonObject = {}
// Detect the actual depth of the first field (may differ from baseDepth in nested structures)
let computedDepth: Depth | undefined
while (!cursor.atEnd()) { while (!cursor.atEnd()) {
const line = cursor.peek() const line = cursor.peek()
if (!line || line.depth < baseDepth) { if (!line || line.depth < baseDepth) {
break break
} }
if (line.depth === baseDepth) { if (computedDepth === undefined && line.depth >= baseDepth) {
const [key, value] = decodeKeyValuePair(line, cursor, baseDepth, options) computedDepth = line.depth
}
if (line.depth === computedDepth) {
const [key, value] = decodeKeyValuePair(line, cursor, computedDepth, options)
obj[key] = value obj[key] = value
} }
else { else {
// Different depth (shallower or deeper) - stop object parsing
break break
} }
} }
@@ -189,14 +197,17 @@ function decodeListArray(
break break
} }
if (line.depth === itemDepth && line.content.startsWith(LIST_ITEM_PREFIX)) { // Check for list item (with or without space after hyphen)
const isListItem = line.content.startsWith(LIST_ITEM_PREFIX) || line.content === '-'
if (line.depth === itemDepth && isListItem) {
// Track first and last item line numbers // Track first and last item line numbers
if (startLine === undefined) { if (startLine === undefined) {
startLine = line.lineNumber startLine = line.lineNumber
} }
endLine = line.lineNumber endLine = line.lineNumber
const item = decodeListItem(cursor, itemDepth, header.delimiter, options) const item = decodeListItem(cursor, itemDepth, options)
items.push(item) items.push(item)
// Update endLine to the current cursor position (after item was decoded) // Update endLine to the current cursor position (after item was decoded)
@@ -303,7 +314,6 @@ function decodeTabularArray(
function decodeListItem( function decodeListItem(
cursor: LineCursor, cursor: LineCursor,
baseDepth: Depth, baseDepth: Depth,
activeDelimiter: Delimiter,
options: ResolvedDecodeOptions, options: ResolvedDecodeOptions,
): JsonValue { ): JsonValue {
const line = cursor.next() const line = cursor.next()
@@ -311,7 +321,24 @@ function decodeListItem(
throw new ReferenceError('Expected list item') throw new ReferenceError('Expected list item')
} }
const afterHyphen = line.content.slice(LIST_ITEM_PREFIX.length) // Check for list item (with or without space after hyphen)
let afterHyphen: string
// Empty list item should be an empty object
if (line.content === '-') {
return {}
}
else if (line.content.startsWith(LIST_ITEM_PREFIX)) {
afterHyphen = line.content.slice(LIST_ITEM_PREFIX.length)
}
else {
throw new SyntaxError(`Expected list item to start with "${LIST_ITEM_PREFIX}"`)
}
// Empty content after list item should also be an empty object
if (!afterHyphen.trim()) {
return {}
}
// Check for array header after hyphen // Check for array header after hyphen
if (isArrayHeaderAfterHyphen(afterHyphen)) { if (isArrayHeaderAfterHyphen(afterHyphen)) {

View File

@@ -9,13 +9,33 @@ export function parseArrayHeaderLine(
content: string, content: string,
defaultDelimiter: Delimiter, defaultDelimiter: Delimiter,
): { header: ArrayHeaderInfo, inlineValues?: string } | undefined { ): { header: ArrayHeaderInfo, inlineValues?: string } | undefined {
// Don't match if the line starts with a quote (it's a quoted key, not an array) const trimmed = content.trimStart()
if (content.trimStart().startsWith(DOUBLE_QUOTE)) {
// Find the bracket segment, accounting for quoted keys that may contain brackets
let bracketStart = -1
// For quoted keys, find bracket after closing quote (not inside the quoted string)
if (trimmed.startsWith(DOUBLE_QUOTE)) {
const closingQuoteIndex = findClosingQuote(trimmed, 0)
if (closingQuoteIndex === -1) {
return return
} }
// Find the bracket segment first const afterQuote = trimmed.slice(closingQuoteIndex + 1)
const bracketStart = content.indexOf(OPEN_BRACKET) if (!afterQuote.startsWith(OPEN_BRACKET)) {
return
}
// Calculate position in original content and find bracket after the quoted key
const leadingWhitespace = content.length - trimmed.length
const keyEndIndex = leadingWhitespace + closingQuoteIndex + 1
bracketStart = content.indexOf(OPEN_BRACKET, keyEndIndex)
}
else {
// Unquoted key - find first bracket
bracketStart = content.indexOf(OPEN_BRACKET)
}
if (bracketStart === -1) { if (bracketStart === -1) {
return return
} }
@@ -44,7 +64,13 @@ export function parseArrayHeaderLine(
return return
} }
const key = bracketStart > 0 ? content.slice(0, bracketStart) : undefined // Extract and parse the key (might be quoted)
let key: string | undefined
if (bracketStart > 0) {
const rawKey = content.slice(0, bracketStart).trim()
key = rawKey.startsWith(DOUBLE_QUOTE) ? parseStringLiteral(rawKey) : rawKey
}
const afterColon = content.slice(colonIndex + 1).trim() const afterColon = content.slice(colonIndex + 1).trim()
const bracketContent = content.slice(bracketStart + 1, bracketEnd) const bracketContent = content.slice(bracketStart + 1, bracketEnd)

10
pnpm-lock.yaml generated
View File

@@ -102,8 +102,8 @@ importers:
packages/toon: packages/toon:
devDependencies: devDependencies:
'@toon-format/spec': '@toon-format/spec':
specifier: ^1.3.0 specifier: ^1.3.3
version: 1.3.0 version: 1.3.3
packages: packages:
@@ -833,8 +833,8 @@ packages:
peerDependencies: peerDependencies:
eslint: '>=9.0.0' eslint: '>=9.0.0'
'@toon-format/spec@1.3.0': '@toon-format/spec@1.3.3':
resolution: {integrity: sha512-uZrR+aML7i6K1Lt8pedx24rxLKZbg2NjcDO/jVuBqjfQ/Is1kKHpyMMwCg88zQuXGQIgjogh7MbsNkmD5WNlxQ==} resolution: {integrity: sha512-AgOQGwv6EJUGj1zWjaSXMfFn3imsvwC3NHdaXLFmI6zF3dJ3LtrBzU4sg5meVJMO6bxgl4Wrl3/U/b53aDRSPA==}
'@tybys/wasm-util@0.10.1': '@tybys/wasm-util@0.10.1':
resolution: {integrity: sha512-9tTaPJLSiejZKx+Bmog4uSubteqTvFrVrURwkmHixBo0G4seD0zUxp98E1DzUBJxLQ3NPwXrGKDiVjwx/DpPsg==} resolution: {integrity: sha512-9tTaPJLSiejZKx+Bmog4uSubteqTvFrVrURwkmHixBo0G4seD0zUxp98E1DzUBJxLQ3NPwXrGKDiVjwx/DpPsg==}
@@ -3042,7 +3042,7 @@ snapshots:
estraverse: 5.3.0 estraverse: 5.3.0
picomatch: 4.0.3 picomatch: 4.0.3
'@toon-format/spec@1.3.0': {} '@toon-format/spec@1.3.3': {}
'@tybys/wasm-util@0.10.1': '@tybys/wasm-util@0.10.1':
dependencies: dependencies: