fix: handle empty list items and nested objects in list items (#65)

* fix: support quoted keys with array syntax Fixes parsing of quoted keys followed by array syntax like: "x-codeSamples"[1]{lang,label,source}: Previously, parseArrayHeaderLine would skip any line starting with a quoted key. This caused large OpenAPI specs (like Hetzner Cloud API) to fail decoding. Changes: - Modified parseArrayHeaderLine to handle quoted keys - Added logic to find bracket start after closing quote - Unescape quoted keys properly - Added 3 test cases for the new functionality Closes #62 * fix: handle empty list items and nested objects in list items This commit fixes two critical decoder bugs that prevented complex OpenAPI specs (like DigitalOcean's 638 schemas) from being decoded: 1. Empty list items: Items encoded as just `-` (without space) were not recognized. The decoder only checked for `LIST_ITEM_PREFIX = '- '`. Fixed by adding check for both `- ` and `-` patterns. 2. Nested objects in list items: When a list item contains an object with nested properties (e.g., `allOf[2]: - properties: state: ...`), the decoder was looking for nested content at the wrong depth level. List items add one level of indentation, so nested content should be at baseDepth + 2, not baseDepth + 1. Fixed by creating `decodeKeyValueForListItem()` that correctly handles the extra nesting while maintaining proper followDepth for siblings. Changes: - Added `decodeKeyValueForListItem()` function to handle list item nesting - Updated `decodeObjectFromListItem()` to use new function - Added empty item detection in `decodeListArray()` - Added comprehensive unit tests for both bugs - Added integration test with real DigitalOcean OpenAPI spec (638 schemas) - Gitignored large fixture files, added README with download instructions Tests: - 5 new unit tests in list-item-bugs.test.ts - 1 integration test in digitalocean-decode.test.ts (skips if fixture missing) - All 309 existing tests still pass 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com> * perf: calculate depth on demand * chore: move tests to test suite * chore: test against new tests --------- Co-authored-by: Claude <noreply@anthropic.com> Co-authored-by: Johann Schopplich <mail@johannschopplich.com>
2026-01-29 23:34:10 +08:00 · 2025-11-03 10:18:14 +03:00
parent cfc3dadb21
commit e414ca3671
5 changed files with 76 additions and 21 deletions
--- a/packages/toon/package.json
+++ b/packages/toon/package.json
@@ -38,6 +38,6 @@
    "test": "vitest"
  },
  "devDependencies": {
-    "@toon-format/spec": "^1.3.0"
+    "@toon-format/spec": "^1.3.3"
  }
 }
--- a/packages/toon/src/decode/decoders.ts
+++ b/packages/toon/src/decode/decoders.ts
@@ -1,4 +1,4 @@
-import type { ArrayHeaderInfo, Delimiter, Depth, JsonArray, JsonObject, JsonPrimitive, JsonValue, ParsedLine, ResolvedDecodeOptions } from '../types'
+import type { ArrayHeaderInfo, Depth, JsonArray, JsonObject, JsonPrimitive, JsonValue, ParsedLine, ResolvedDecodeOptions } from '../types'
 import type { LineCursor } from './scanner'
 import { COLON, DEFAULT_DELIMITER, LIST_ITEM_PREFIX } from '../constants'
 import { findClosingQuote } from '../shared/string-utils'
@@ -40,8 +40,8 @@ function isKeyValueLine(line: ParsedLine): boolean {
    if (closingQuoteIndex === -1) {
      return false
    }
-    // Check if there's a colon after the quoted key
-    return closingQuoteIndex + 1 < content.length && content[closingQuoteIndex + 1] === COLON
+    // Check if colon exists after quoted key (may have array/brace syntax between)
+    return content.slice(closingQuoteIndex + 1).includes(COLON)
  }
  else {
    // Unquoted key - look for first colon not inside quotes
@@ -56,17 +56,25 @@ function isKeyValueLine(line: ParsedLine): boolean {
 function decodeObject(cursor: LineCursor, baseDepth: Depth, options: ResolvedDecodeOptions): JsonObject {
  const obj: JsonObject = {}

+  // Detect the actual depth of the first field (may differ from baseDepth in nested structures)
+  let computedDepth: Depth | undefined
+
  while (!cursor.atEnd()) {
    const line = cursor.peek()
    if (!line || line.depth < baseDepth) {
      break
    }

-    if (line.depth === baseDepth) {
-      const [key, value] = decodeKeyValuePair(line, cursor, baseDepth, options)
+    if (computedDepth === undefined && line.depth >= baseDepth) {
+      computedDepth = line.depth
+    }
+
+    if (line.depth === computedDepth) {
+      const [key, value] = decodeKeyValuePair(line, cursor, computedDepth, options)
      obj[key] = value
    }
    else {
+      // Different depth (shallower or deeper) - stop object parsing
      break
    }
  }
@@ -189,14 +197,17 @@ function decodeListArray(
      break
    }

-    if (line.depth === itemDepth && line.content.startsWith(LIST_ITEM_PREFIX)) {
+    // Check for list item (with or without space after hyphen)
+    const isListItem = line.content.startsWith(LIST_ITEM_PREFIX) || line.content === '-'
+
+    if (line.depth === itemDepth && isListItem) {
      // Track first and last item line numbers
      if (startLine === undefined) {
        startLine = line.lineNumber
      }
      endLine = line.lineNumber

-      const item = decodeListItem(cursor, itemDepth, header.delimiter, options)
+      const item = decodeListItem(cursor, itemDepth, options)
      items.push(item)

      // Update endLine to the current cursor position (after item was decoded)
@@ -303,7 +314,6 @@ function decodeTabularArray(
 function decodeListItem(
  cursor: LineCursor,
  baseDepth: Depth,
-  activeDelimiter: Delimiter,
  options: ResolvedDecodeOptions,
 ): JsonValue {
  const line = cursor.next()
@@ -311,7 +321,24 @@ function decodeListItem(
    throw new ReferenceError('Expected list item')
  }

-  const afterHyphen = line.content.slice(LIST_ITEM_PREFIX.length)
+  // Check for list item (with or without space after hyphen)
+  let afterHyphen: string
+
+  // Empty list item should be an empty object
+  if (line.content === '-') {
+    return {}
+  }
+  else if (line.content.startsWith(LIST_ITEM_PREFIX)) {
+    afterHyphen = line.content.slice(LIST_ITEM_PREFIX.length)
+  }
+  else {
+    throw new SyntaxError(`Expected list item to start with "${LIST_ITEM_PREFIX}"`)
+  }
+
+  // Empty content after list item should also be an empty object
+  if (!afterHyphen.trim()) {
+    return {}
+  }

  // Check for array header after hyphen
  if (isArrayHeaderAfterHyphen(afterHyphen)) {
--- a/packages/toon/src/decode/parser.ts
+++ b/packages/toon/src/decode/parser.ts
@@ -9,13 +9,33 @@ export function parseArrayHeaderLine(
  content: string,
  defaultDelimiter: Delimiter,
 ): { header: ArrayHeaderInfo, inlineValues?: string } | undefined {
-  // Don't match if the line starts with a quote (it's a quoted key, not an array)
-  if (content.trimStart().startsWith(DOUBLE_QUOTE)) {
-    return
+  const trimmed = content.trimStart()
+
+  // Find the bracket segment, accounting for quoted keys that may contain brackets
+  let bracketStart = -1
+
+  // For quoted keys, find bracket after closing quote (not inside the quoted string)
+  if (trimmed.startsWith(DOUBLE_QUOTE)) {
+    const closingQuoteIndex = findClosingQuote(trimmed, 0)
+    if (closingQuoteIndex === -1) {
+      return
+    }
+
+    const afterQuote = trimmed.slice(closingQuoteIndex + 1)
+    if (!afterQuote.startsWith(OPEN_BRACKET)) {
+      return
+    }
+
+    // Calculate position in original content and find bracket after the quoted key
+    const leadingWhitespace = content.length - trimmed.length
+    const keyEndIndex = leadingWhitespace + closingQuoteIndex + 1
+    bracketStart = content.indexOf(OPEN_BRACKET, keyEndIndex)
+  }
+  else {
+    // Unquoted key - find first bracket
+    bracketStart = content.indexOf(OPEN_BRACKET)
  }

-  // Find the bracket segment first
-  const bracketStart = content.indexOf(OPEN_BRACKET)
  if (bracketStart === -1) {
    return
  }
@@ -44,7 +64,13 @@ export function parseArrayHeaderLine(
    return
  }

-  const key = bracketStart > 0 ? content.slice(0, bracketStart) : undefined
+  // Extract and parse the key (might be quoted)
+  let key: string | undefined
+  if (bracketStart > 0) {
+    const rawKey = content.slice(0, bracketStart).trim()
+    key = rawKey.startsWith(DOUBLE_QUOTE) ? parseStringLiteral(rawKey) : rawKey
+  }
+
  const afterColon = content.slice(colonIndex + 1).trim()

  const bracketContent = content.slice(bracketStart + 1, bracketEnd)