From e414ca3671f756a1b1185c221567f297743f5fa9 Mon Sep 17 00:00:00 2001 From: Wind Date: Mon, 3 Nov 2025 10:18:14 +0300 Subject: [PATCH] fix: handle empty list items and nested objects in list items (#65) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: support quoted keys with array syntax Fixes parsing of quoted keys followed by array syntax like: "x-codeSamples"[1]{lang,label,source}: Previously, parseArrayHeaderLine would skip any line starting with a quoted key. This caused large OpenAPI specs (like Hetzner Cloud API) to fail decoding. Changes: - Modified parseArrayHeaderLine to handle quoted keys - Added logic to find bracket start after closing quote - Unescape quoted keys properly - Added 3 test cases for the new functionality Closes #62 * fix: handle empty list items and nested objects in list items This commit fixes two critical decoder bugs that prevented complex OpenAPI specs (like DigitalOcean's 638 schemas) from being decoded: 1. Empty list items: Items encoded as just `-` (without space) were not recognized. The decoder only checked for `LIST_ITEM_PREFIX = '- '`. Fixed by adding check for both `- ` and `-` patterns. 2. Nested objects in list items: When a list item contains an object with nested properties (e.g., `allOf[2]: - properties: state: ...`), the decoder was looking for nested content at the wrong depth level. List items add one level of indentation, so nested content should be at baseDepth + 2, not baseDepth + 1. Fixed by creating `decodeKeyValueForListItem()` that correctly handles the extra nesting while maintaining proper followDepth for siblings. Changes: - Added `decodeKeyValueForListItem()` function to handle list item nesting - Updated `decodeObjectFromListItem()` to use new function - Added empty item detection in `decodeListArray()` - Added comprehensive unit tests for both bugs - Added integration test with real DigitalOcean OpenAPI spec (638 schemas) - Gitignored large fixture files, added README with download instructions Tests: - 5 new unit tests in list-item-bugs.test.ts - 1 integration test in digitalocean-decode.test.ts (skips if fixture missing) - All 309 existing tests still pass 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude * perf: calculate depth on demand * chore: move tests to test suite * chore: test against new tests --------- Co-authored-by: Claude Co-authored-by: Johann Schopplich --- .gitignore | 2 ++ packages/toon/package.json | 2 +- packages/toon/src/decode/decoders.ts | 45 ++++++++++++++++++++++------ packages/toon/src/decode/parser.ts | 38 +++++++++++++++++++---- pnpm-lock.yaml | 10 +++---- 5 files changed, 76 insertions(+), 21 deletions(-) diff --git a/.gitignore b/.gitignore index f73f2b4..bcf52d9 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,5 @@ dist node_modules .DS_Store .env +packages/toon/test/fixtures/*.json +packages/toon/test/fixtures/*.toon diff --git a/packages/toon/package.json b/packages/toon/package.json index b039236..9e81c23 100644 --- a/packages/toon/package.json +++ b/packages/toon/package.json @@ -38,6 +38,6 @@ "test": "vitest" }, "devDependencies": { - "@toon-format/spec": "^1.3.0" + "@toon-format/spec": "^1.3.3" } } diff --git a/packages/toon/src/decode/decoders.ts b/packages/toon/src/decode/decoders.ts index 609a9f7..bf38162 100644 --- a/packages/toon/src/decode/decoders.ts +++ b/packages/toon/src/decode/decoders.ts @@ -1,4 +1,4 @@ -import type { ArrayHeaderInfo, Delimiter, Depth, JsonArray, JsonObject, JsonPrimitive, JsonValue, ParsedLine, ResolvedDecodeOptions } from '../types' +import type { ArrayHeaderInfo, Depth, JsonArray, JsonObject, JsonPrimitive, JsonValue, ParsedLine, ResolvedDecodeOptions } from '../types' import type { LineCursor } from './scanner' import { COLON, DEFAULT_DELIMITER, LIST_ITEM_PREFIX } from '../constants' import { findClosingQuote } from '../shared/string-utils' @@ -40,8 +40,8 @@ function isKeyValueLine(line: ParsedLine): boolean { if (closingQuoteIndex === -1) { return false } - // Check if there's a colon after the quoted key - return closingQuoteIndex + 1 < content.length && content[closingQuoteIndex + 1] === COLON + // Check if colon exists after quoted key (may have array/brace syntax between) + return content.slice(closingQuoteIndex + 1).includes(COLON) } else { // Unquoted key - look for first colon not inside quotes @@ -56,17 +56,25 @@ function isKeyValueLine(line: ParsedLine): boolean { function decodeObject(cursor: LineCursor, baseDepth: Depth, options: ResolvedDecodeOptions): JsonObject { const obj: JsonObject = {} + // Detect the actual depth of the first field (may differ from baseDepth in nested structures) + let computedDepth: Depth | undefined + while (!cursor.atEnd()) { const line = cursor.peek() if (!line || line.depth < baseDepth) { break } - if (line.depth === baseDepth) { - const [key, value] = decodeKeyValuePair(line, cursor, baseDepth, options) + if (computedDepth === undefined && line.depth >= baseDepth) { + computedDepth = line.depth + } + + if (line.depth === computedDepth) { + const [key, value] = decodeKeyValuePair(line, cursor, computedDepth, options) obj[key] = value } else { + // Different depth (shallower or deeper) - stop object parsing break } } @@ -189,14 +197,17 @@ function decodeListArray( break } - if (line.depth === itemDepth && line.content.startsWith(LIST_ITEM_PREFIX)) { + // Check for list item (with or without space after hyphen) + const isListItem = line.content.startsWith(LIST_ITEM_PREFIX) || line.content === '-' + + if (line.depth === itemDepth && isListItem) { // Track first and last item line numbers if (startLine === undefined) { startLine = line.lineNumber } endLine = line.lineNumber - const item = decodeListItem(cursor, itemDepth, header.delimiter, options) + const item = decodeListItem(cursor, itemDepth, options) items.push(item) // Update endLine to the current cursor position (after item was decoded) @@ -303,7 +314,6 @@ function decodeTabularArray( function decodeListItem( cursor: LineCursor, baseDepth: Depth, - activeDelimiter: Delimiter, options: ResolvedDecodeOptions, ): JsonValue { const line = cursor.next() @@ -311,7 +321,24 @@ function decodeListItem( throw new ReferenceError('Expected list item') } - const afterHyphen = line.content.slice(LIST_ITEM_PREFIX.length) + // Check for list item (with or without space after hyphen) + let afterHyphen: string + + // Empty list item should be an empty object + if (line.content === '-') { + return {} + } + else if (line.content.startsWith(LIST_ITEM_PREFIX)) { + afterHyphen = line.content.slice(LIST_ITEM_PREFIX.length) + } + else { + throw new SyntaxError(`Expected list item to start with "${LIST_ITEM_PREFIX}"`) + } + + // Empty content after list item should also be an empty object + if (!afterHyphen.trim()) { + return {} + } // Check for array header after hyphen if (isArrayHeaderAfterHyphen(afterHyphen)) { diff --git a/packages/toon/src/decode/parser.ts b/packages/toon/src/decode/parser.ts index fbbb4d9..847f32c 100644 --- a/packages/toon/src/decode/parser.ts +++ b/packages/toon/src/decode/parser.ts @@ -9,13 +9,33 @@ export function parseArrayHeaderLine( content: string, defaultDelimiter: Delimiter, ): { header: ArrayHeaderInfo, inlineValues?: string } | undefined { - // Don't match if the line starts with a quote (it's a quoted key, not an array) - if (content.trimStart().startsWith(DOUBLE_QUOTE)) { - return + const trimmed = content.trimStart() + + // Find the bracket segment, accounting for quoted keys that may contain brackets + let bracketStart = -1 + + // For quoted keys, find bracket after closing quote (not inside the quoted string) + if (trimmed.startsWith(DOUBLE_QUOTE)) { + const closingQuoteIndex = findClosingQuote(trimmed, 0) + if (closingQuoteIndex === -1) { + return + } + + const afterQuote = trimmed.slice(closingQuoteIndex + 1) + if (!afterQuote.startsWith(OPEN_BRACKET)) { + return + } + + // Calculate position in original content and find bracket after the quoted key + const leadingWhitespace = content.length - trimmed.length + const keyEndIndex = leadingWhitespace + closingQuoteIndex + 1 + bracketStart = content.indexOf(OPEN_BRACKET, keyEndIndex) + } + else { + // Unquoted key - find first bracket + bracketStart = content.indexOf(OPEN_BRACKET) } - // Find the bracket segment first - const bracketStart = content.indexOf(OPEN_BRACKET) if (bracketStart === -1) { return } @@ -44,7 +64,13 @@ export function parseArrayHeaderLine( return } - const key = bracketStart > 0 ? content.slice(0, bracketStart) : undefined + // Extract and parse the key (might be quoted) + let key: string | undefined + if (bracketStart > 0) { + const rawKey = content.slice(0, bracketStart).trim() + key = rawKey.startsWith(DOUBLE_QUOTE) ? parseStringLiteral(rawKey) : rawKey + } + const afterColon = content.slice(colonIndex + 1).trim() const bracketContent = content.slice(bracketStart + 1, bracketEnd) diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 49aa280..c0e08a7 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -102,8 +102,8 @@ importers: packages/toon: devDependencies: '@toon-format/spec': - specifier: ^1.3.0 - version: 1.3.0 + specifier: ^1.3.3 + version: 1.3.3 packages: @@ -833,8 +833,8 @@ packages: peerDependencies: eslint: '>=9.0.0' - '@toon-format/spec@1.3.0': - resolution: {integrity: sha512-uZrR+aML7i6K1Lt8pedx24rxLKZbg2NjcDO/jVuBqjfQ/Is1kKHpyMMwCg88zQuXGQIgjogh7MbsNkmD5WNlxQ==} + '@toon-format/spec@1.3.3': + resolution: {integrity: sha512-AgOQGwv6EJUGj1zWjaSXMfFn3imsvwC3NHdaXLFmI6zF3dJ3LtrBzU4sg5meVJMO6bxgl4Wrl3/U/b53aDRSPA==} '@tybys/wasm-util@0.10.1': resolution: {integrity: sha512-9tTaPJLSiejZKx+Bmog4uSubteqTvFrVrURwkmHixBo0G4seD0zUxp98E1DzUBJxLQ3NPwXrGKDiVjwx/DpPsg==} @@ -3042,7 +3042,7 @@ snapshots: estraverse: 5.3.0 picomatch: 4.0.3 - '@toon-format/spec@1.3.0': {} + '@toon-format/spec@1.3.3': {} '@tybys/wasm-util@0.10.1': dependencies: