chore: more work on benchmarks

This commit is contained in:
Johann Schopplich
2025-11-06 15:51:31 +01:00
parent bc711ccecf
commit a9d52fc69b
15 changed files with 1647 additions and 213 deletions

View File

@@ -34,7 +34,7 @@ Results are saved to `results/token-efficiency.md`.
Tests how well LLMs can answer questions about data in different formats (TOON, JSON, JSON compact, XML, YAML, CSV): Tests how well LLMs can answer questions about data in different formats (TOON, JSON, JSON compact, XML, YAML, CSV):
1. Generate ~150-160 questions across 6 datasets (CSV only included for datasets with flat/tabular structure) 1. Generate ~200 questions across 6 datasets (CSV only included for datasets with flat/tabular structure)
2. Convert each dataset to all supported formats 2. Convert each dataset to all supported formats
3. Query each LLM with formatted data + question 3. Query each LLM with formatted data + question
4. Validate answers using `gpt-5-nano` as judge 4. Validate answers using `gpt-5-nano` as judge

File diff suppressed because it is too large Load Diff

View File

@@ -100,7 +100,7 @@ function generateTotalLines(
const csvStr = baselineFormat.tokens.toLocaleString('en-US').padStart(TOKEN_PADDING) const csvStr = baselineFormat.tokens.toLocaleString('en-US').padStart(TOKEN_PADDING)
lines.push(`csv ${csvBar} ${csvStr} tokens`) lines.push(`csv ${csvBar} ${csvStr} tokens`)
const overheadPercent = ((totalToonTokens - baselineFormat.tokens) / totalToonTokens) * 100 const overheadPercent = ((totalToonTokens - baselineFormat.tokens) / baselineFormat.tokens) * 100
const toonBar = createProgressBar(100, 100, PROGRESS_BAR_WIDTH, PROGRESS_BAR_CONFIG) const toonBar = createProgressBar(100, 100, PROGRESS_BAR_WIDTH, PROGRESS_BAR_CONFIG)
const toonStr = totalToonTokens.toLocaleString('en-US').padStart(TOKEN_PADDING) const toonStr = totalToonTokens.toLocaleString('en-US').padStart(TOKEN_PADDING)
lines.push(`toon ${toonBar} ${toonStr} tokens (+${overheadPercent.toFixed(1)}% vs CSV)`) lines.push(`toon ${toonBar} ${toonStr} tokens (+${overheadPercent.toFixed(1)}% vs CSV)`)
@@ -223,7 +223,7 @@ const flatCharts = flatOnlyDatasets
// TOON line with overhead vs CSV // TOON line with overhead vs CSV
const toonOverhead = toon.tokens - csv.tokens const toonOverhead = toon.tokens - csv.tokens
const toonOverheadPercent = (toonOverhead / toon.tokens) * 100 const toonOverheadPercent = (toonOverhead / csv.tokens) * 100
const toonBar = createProgressBar(100, 100, PROGRESS_BAR_WIDTH, PROGRESS_BAR_CONFIG) const toonBar = createProgressBar(100, 100, PROGRESS_BAR_WIDTH, PROGRESS_BAR_CONFIG)
const toonStr = toon.tokens.toLocaleString('en-US') const toonStr = toon.tokens.toLocaleString('en-US')
const toonVsCSV = toonOverheadPercent >= 0 const toonVsCSV = toonOverheadPercent >= 0

View File

@@ -101,10 +101,10 @@ export const QUESTION_THRESHOLDS = {
*/ */
export const QUESTION_LIMITS = { export const QUESTION_LIMITS = {
tabular: { tabular: {
fieldRetrieval: 20, fieldRetrieval: 14,
aggregationDepartments: 6, aggregationDepartments: 4,
filteringMultiConditionDepartments: 6, filteringMultiConditionDepartments: 5,
filteringExperience: 4, filteringExperience: 3,
filteringDepartmentExp: 3, filteringDepartmentExp: 3,
filteringDepartmentActive: 3, filteringDepartmentActive: 3,
}, },
@@ -116,7 +116,7 @@ export const QUESTION_LIMITS = {
filteringStatusAndItems: 3, filteringStatusAndItems: 3,
}, },
analytics: { analytics: {
fieldRetrievalDates: 13, fieldRetrievalDates: 9,
}, },
github: { github: {
fieldRetrievalRepos: 11, fieldRetrievalRepos: 11,
@@ -125,12 +125,12 @@ export const QUESTION_LIMITS = {
}, },
eventLogs: { eventLogs: {
fieldRetrieval: 10, fieldRetrieval: 10,
aggregationEndpoints: 3, aggregationEndpoints: 4,
filteringLevelAndStatus: 2, filteringLevelAndStatus: 3,
filteringEndpointAndStatus: 2, filteringEndpointAndStatus: 3,
}, },
nestedConfig: { nestedConfig: {
fieldRetrieval: 5, fieldRetrieval: 10,
filteringComplex: 2, filteringComplex: 6,
}, },
} as const } as const

View File

@@ -5,67 +5,6 @@ import githubRepos from '../data/github-repos.json' with { type: 'json' }
// Seed for reproducibility // Seed for reproducibility
faker.seed(12345) faker.seed(12345)
/**
* Calculate the tabular eligibility percentage of a data structure
*
* @remarks
* Recursively analyzes data to determine what percentage of arrays qualify
* for TOON's tabular format (uniform objects with primitive values only).
*/
export function calculateTabularEligibility(data: unknown): number {
let totalArrays = 0
let tabularArrays = 0
function isTabularArray(arr: unknown[]): boolean {
if (arr.length === 0)
return false
// Check if all elements are objects
if (!arr.every(item => typeof item === 'object' && item !== null && !Array.isArray(item)))
return false
// Get keys from first object
const firstKeys = Object.keys(arr[0] as Record<string, unknown>)
if (firstKeys.length === 0)
return false
// Check if all objects have the same keys and only primitive values
return arr.every((item) => {
const itemObj = item as Record<string, unknown>
const itemKeys = Object.keys(itemObj)
if (itemKeys.length !== firstKeys.length)
return false
if (!firstKeys.every(key => itemKeys.includes(key)))
return false
// Check if all values are primitives (no nested objects or arrays)
return firstKeys.every((key) => {
const value = itemObj[key]
return value === null || ['string', 'number', 'boolean'].includes(typeof value)
})
})
}
function traverse(obj: unknown): void {
if (Array.isArray(obj)) {
totalArrays++
if (isTabularArray(obj))
tabularArrays++
// Continue traversing array elements
obj.forEach(item => traverse(item))
}
else if (typeof obj === 'object' && obj !== null) {
// Traverse object properties
Object.values(obj).forEach(value => traverse(value))
}
}
traverse(data)
return totalArrays === 0 ? 0 : Math.round((tabularArrays / totalArrays) * 100)
}
/** /**
* Employee record structure for tabular dataset * Employee record structure for tabular dataset
*/ */
@@ -275,7 +214,7 @@ const tabularDataset: Dataset = {
metadata: { metadata: {
supportsCSV: true, supportsCSV: true,
structureClass: 'uniform', structureClass: 'uniform',
tabularEligibility: 100, tabularEligibility: 100, // All arrays contain uniform objects with primitive values only
}, },
} }
@@ -285,38 +224,21 @@ const tabularDataset: Dataset = {
const PRODUCT_NAMES = ['Wireless Mouse', 'USB Cable', 'Laptop Stand', 'Keyboard', 'Webcam', 'Headphones', 'Monitor', 'Desk Lamp'] as const const PRODUCT_NAMES = ['Wireless Mouse', 'USB Cable', 'Laptop Stand', 'Keyboard', 'Webcam', 'Headphones', 'Monitor', 'Desk Lamp'] as const
const ORDER_STATUSES = ['pending', 'processing', 'shipped', 'delivered', 'cancelled'] as const const ORDER_STATUSES = ['pending', 'processing', 'shipped', 'delivered', 'cancelled'] as const
const ORDER_CONSTANTS = {
CUSTOMER_ID_MOD: 20,
MIN_ITEMS: 1,
MAX_ITEMS: 4,
MIN_ITEM_PRICE: 9.99,
MAX_ITEM_PRICE: 199.99,
MIN_ITEM_QUANTITY: 1,
MAX_ITEM_QUANTITY: 5,
SKU_LENGTH: 6,
ORDER_ID_PADDING: 4,
RECENT_DAYS: 90,
TAX_RATE: 0.08,
} as const
function generateOrders(count: number): { orders: Order[] } { function generateOrders(count: number): { orders: Order[] } {
return { return {
orders: Array.from({ length: count }, (_, i) => { orders: Array.from({ length: count }, (_, i) => {
const customerId = (i % ORDER_CONSTANTS.CUSTOMER_ID_MOD) + 1 const customerId = (i % 20) + 1 // Rotate through 20 customers
const itemCount = faker.number.int({ min: ORDER_CONSTANTS.MIN_ITEMS, max: ORDER_CONSTANTS.MAX_ITEMS }) const itemCount = faker.number.int({ min: 1, max: 4 }) // 1-4 items per order
const items = Array.from({ length: itemCount }, (_, j) => { const items = Array.from({ length: itemCount }, (_, j) => {
const price = faker.number.float({ const price = faker.number.float({
min: ORDER_CONSTANTS.MIN_ITEM_PRICE, min: 9.99,
max: ORDER_CONSTANTS.MAX_ITEM_PRICE, max: 199.99,
fractionDigits: 2, fractionDigits: 2,
}) })
const quantity = faker.number.int({ const quantity = faker.number.int({ min: 1, max: 5 })
min: ORDER_CONSTANTS.MIN_ITEM_QUANTITY,
max: ORDER_CONSTANTS.MAX_ITEM_QUANTITY,
})
return { return {
sku: `SKU-${faker.string.alphanumeric({ length: ORDER_CONSTANTS.SKU_LENGTH }).toUpperCase()}`, sku: `SKU-${faker.string.alphanumeric({ length: 6 }).toUpperCase()}`,
name: PRODUCT_NAMES[j % PRODUCT_NAMES.length]!, name: PRODUCT_NAMES[j % PRODUCT_NAMES.length]!,
quantity, quantity,
price, price,
@@ -324,11 +246,11 @@ function generateOrders(count: number): { orders: Order[] } {
}) })
const subtotal = Number(items.reduce((sum, item) => sum + (item.price * item.quantity), 0).toFixed(2)) const subtotal = Number(items.reduce((sum, item) => sum + (item.price * item.quantity), 0).toFixed(2))
const tax = Number((subtotal * ORDER_CONSTANTS.TAX_RATE).toFixed(2)) const tax = Number((subtotal * 0.08).toFixed(2)) // 8% tax rate
const total = Number((subtotal + tax).toFixed(2)) const total = Number((subtotal + tax).toFixed(2))
return { return {
orderId: `ORD-${String(i + 1).padStart(ORDER_CONSTANTS.ORDER_ID_PADDING, '0')}`, orderId: `ORD-${String(i + 1).padStart(4, '0')}`,
customer: { customer: {
id: customerId, id: customerId,
name: faker.person.fullName(), name: faker.person.fullName(),
@@ -340,7 +262,7 @@ function generateOrders(count: number): { orders: Order[] } {
tax, tax,
total, total,
status: ORDER_STATUSES[i % ORDER_STATUSES.length]!, status: ORDER_STATUSES[i % ORDER_STATUSES.length]!,
orderDate: faker.date.recent({ days: ORDER_CONSTANTS.RECENT_DAYS }).toISOString().split('T')[0], orderDate: faker.date.recent({ days: 90 }).toISOString().split('T')[0],
} }
}), }),
} }
@@ -359,7 +281,7 @@ const nestedDataset: Dataset = {
metadata: { metadata: {
supportsCSV: false, supportsCSV: false,
structureClass: 'nested', structureClass: 'nested',
tabularEligibility: 33, // orders array is not tabular, but items arrays within are tabularEligibility: 33, // Top-level orders array has nested objects (not tabular), but nested items arrays are tabular
}, },
} }
@@ -376,7 +298,7 @@ const analyticsDataset: Dataset = {
metadata: { metadata: {
supportsCSV: true, supportsCSV: true,
structureClass: 'uniform', structureClass: 'uniform',
tabularEligibility: 100, tabularEligibility: 100, // Uniform time-series records with consistent primitive fields
}, },
} }
@@ -395,7 +317,7 @@ const githubDataset: Dataset = {
metadata: { metadata: {
supportsCSV: true, supportsCSV: true,
structureClass: 'uniform', structureClass: 'uniform',
tabularEligibility: 100, tabularEligibility: 100, // Repository array contains uniform objects with primitive values
}, },
} }
@@ -597,7 +519,7 @@ const eventLogsDataset: Dataset = {
metadata: { metadata: {
supportsCSV: false, supportsCSV: false,
structureClass: 'semi-uniform', structureClass: 'semi-uniform',
tabularEligibility: 50, // ~50% of logs have nested error objects tabularEligibility: 50, // Top-level logs array is tabular, but ~50% have nested optional error objects
}, },
} }
@@ -614,7 +536,7 @@ const nestedConfigDataset: Dataset = {
metadata: { metadata: {
supportsCSV: false, supportsCSV: false,
structureClass: 'deep', structureClass: 'deep',
tabularEligibility: 0, // Highly nested, minimal tabular arrays tabularEligibility: 0, // Deeply nested configuration with no tabular arrays
}, },
} }
@@ -642,7 +564,7 @@ export const TOKEN_EFFICIENCY_DATASETS: Dataset[] = [
metadata: { metadata: {
supportsCSV: true, supportsCSV: true,
structureClass: 'uniform', structureClass: 'uniform',
tabularEligibility: 100, tabularEligibility: 100, // All arrays contain uniform objects with primitive values only
}, },
}, },
// Nested: 500 orders // Nested: 500 orders
@@ -653,7 +575,7 @@ export const TOKEN_EFFICIENCY_DATASETS: Dataset[] = [
metadata: { metadata: {
supportsCSV: false, supportsCSV: false,
structureClass: 'nested', structureClass: 'nested',
tabularEligibility: 33, tabularEligibility: 33, // Top-level orders array has nested objects (not tabular), but nested items arrays are tabular
}, },
}, },
// Analytics: 365 days // Analytics: 365 days
@@ -664,7 +586,7 @@ export const TOKEN_EFFICIENCY_DATASETS: Dataset[] = [
metadata: { metadata: {
supportsCSV: true, supportsCSV: true,
structureClass: 'uniform', structureClass: 'uniform',
tabularEligibility: 100, tabularEligibility: 100, // Uniform time-series records with consistent primitive fields
}, },
}, },
// GitHub: 100 repos (same as accuracy) // GitHub: 100 repos (same as accuracy)
@@ -677,7 +599,7 @@ export const TOKEN_EFFICIENCY_DATASETS: Dataset[] = [
metadata: { metadata: {
supportsCSV: false, supportsCSV: false,
structureClass: 'semi-uniform', structureClass: 'semi-uniform',
tabularEligibility: 50, tabularEligibility: 50, // Top-level logs array is tabular, but ~50% have nested optional error objects
}, },
}, },
// Nested config: 1 config (same as accuracy) // Nested config: 1 config (same as accuracy)

View File

@@ -4,7 +4,6 @@ import { anthropic } from '@ai-sdk/anthropic'
import { google } from '@ai-sdk/google' import { google } from '@ai-sdk/google'
import { openai } from '@ai-sdk/openai' import { openai } from '@ai-sdk/openai'
import { xai } from '@ai-sdk/xai' import { xai } from '@ai-sdk/xai'
import * as prompts from '@clack/prompts'
import { generateText } from 'ai' import { generateText } from 'ai'
/** /**
@@ -102,7 +101,6 @@ Is the actual answer correct? Consider:
Respond with only "YES" or "NO". Respond with only "YES" or "NO".
`.trim() `.trim()
try {
const { text } = await generateText({ const { text } = await generateText({
model: models.find(m => m.modelId === 'gpt-5-nano')!, model: models.find(m => m.modelId === 'gpt-5-nano')!,
prompt, prompt,
@@ -110,9 +108,3 @@ Respond with only "YES" or "NO".
return text.trim().toUpperCase() === 'YES' return text.trim().toUpperCase() === 'YES'
} }
catch (error) {
prompts.log.error(`Validation error: ${error}`)
// Fallback to simple string comparison
return actual.toLowerCase().trim() === expected.toLowerCase().trim()
}
}

View File

@@ -1,7 +1,7 @@
import type { AnalyticsMetric } from '../datasets' import type { AnalyticsMetric } from '../datasets'
import type { Question } from '../types' import type { Question } from '../types'
import { QUESTION_LIMITS, QUESTION_THRESHOLDS } from '../constants' import { QUESTION_LIMITS, QUESTION_THRESHOLDS } from '../constants'
import { countByPredicate, QuestionBuilder, rotateQuestions, SAMPLE_STRIDES } from './utils' import { QuestionBuilder, rotateQuestions, SAMPLE_STRIDES } from './utils'
/** /**
* Generate analytics (website metrics) questions * Generate analytics (website metrics) questions
@@ -9,9 +9,6 @@ import { countByPredicate, QuestionBuilder, rotateQuestions, SAMPLE_STRIDES } fr
export function generateAnalyticsQuestions(metrics: AnalyticsMetric[], getId: () => string): Question[] { export function generateAnalyticsQuestions(metrics: AnalyticsMetric[], getId: () => string): Question[] {
const questions: Question[] = [] const questions: Question[] = []
if (metrics.length === 0)
return questions
// Field retrieval: date-based metrics // Field retrieval: date-based metrics
const metricFieldGenerators: Array<(metric: AnalyticsMetric, getId: () => string) => Question> = [ const metricFieldGenerators: Array<(metric: AnalyticsMetric, getId: () => string) => Question> = [
(metric, getId) => new QuestionBuilder() (metric, getId) => new QuestionBuilder()
@@ -99,7 +96,7 @@ export function generateAnalyticsQuestions(metrics: AnalyticsMetric[], getId: ()
// Aggregation: high views/conversions // Aggregation: high views/conversions
for (const threshold of QUESTION_THRESHOLDS.analytics.views) { for (const threshold of QUESTION_THRESHOLDS.analytics.views) {
const count = countByPredicate(metrics, m => m.views > threshold) const count = metrics.filter(m => m.views > threshold).length
questions.push( questions.push(
new QuestionBuilder() new QuestionBuilder()
.id(getId()) .id(getId())
@@ -112,7 +109,7 @@ export function generateAnalyticsQuestions(metrics: AnalyticsMetric[], getId: ()
} }
for (const threshold of QUESTION_THRESHOLDS.analytics.conversions) { for (const threshold of QUESTION_THRESHOLDS.analytics.conversions) {
const count = countByPredicate(metrics, m => m.conversions > threshold) const count = metrics.filter(m => m.conversions > threshold).length
questions.push( questions.push(
new QuestionBuilder() new QuestionBuilder()
.id(getId()) .id(getId())
@@ -126,10 +123,9 @@ export function generateAnalyticsQuestions(metrics: AnalyticsMetric[], getId: ()
// Filtering: multi-condition (views AND revenue) // Filtering: multi-condition (views AND revenue)
for (const threshold of QUESTION_THRESHOLDS.analytics.viewsForFiltering) { for (const threshold of QUESTION_THRESHOLDS.analytics.viewsForFiltering) {
const count = countByPredicate( const count = metrics.filter(
metrics,
m => m.views > threshold && m.conversions > QUESTION_THRESHOLDS.analytics.conversionsForFiltering, m => m.views > threshold && m.conversions > QUESTION_THRESHOLDS.analytics.conversionsForFiltering,
) ).length
questions.push( questions.push(
new QuestionBuilder() new QuestionBuilder()
.id(getId()) .id(getId())
@@ -143,10 +139,9 @@ export function generateAnalyticsQuestions(metrics: AnalyticsMetric[], getId: ()
// Filtering: revenue thresholds // Filtering: revenue thresholds
for (const threshold of QUESTION_THRESHOLDS.analytics.revenueThresholds) { for (const threshold of QUESTION_THRESHOLDS.analytics.revenueThresholds) {
const count = countByPredicate( const count = metrics.filter(
metrics,
m => m.revenue > threshold && m.views > QUESTION_THRESHOLDS.analytics.viewsThresholdForRevenue, m => m.revenue > threshold && m.views > QUESTION_THRESHOLDS.analytics.viewsThresholdForRevenue,
) ).length
questions.push( questions.push(
new QuestionBuilder() new QuestionBuilder()
.id(getId()) .id(getId())
@@ -160,10 +155,9 @@ export function generateAnalyticsQuestions(metrics: AnalyticsMetric[], getId: ()
// Filtering: clicks and conversions // Filtering: clicks and conversions
for (const threshold of QUESTION_THRESHOLDS.analytics.clicksForFiltering) { for (const threshold of QUESTION_THRESHOLDS.analytics.clicksForFiltering) {
const count = countByPredicate( const count = metrics.filter(
metrics,
m => m.clicks > threshold && m.conversions > QUESTION_THRESHOLDS.analytics.conversionsForClickFiltering, m => m.clicks > threshold && m.conversions > QUESTION_THRESHOLDS.analytics.conversionsForClickFiltering,
) ).length
questions.push( questions.push(
new QuestionBuilder() new QuestionBuilder()
.id(getId()) .id(getId())
@@ -177,10 +171,9 @@ export function generateAnalyticsQuestions(metrics: AnalyticsMetric[], getId: ()
// Filtering: revenue and bounce rate // Filtering: revenue and bounce rate
for (const threshold of QUESTION_THRESHOLDS.analytics.revenueForBounceRate) { for (const threshold of QUESTION_THRESHOLDS.analytics.revenueForBounceRate) {
const count = countByPredicate( const count = metrics.filter(
metrics,
m => m.revenue > threshold && m.bounceRate < QUESTION_THRESHOLDS.analytics.bounceRateThreshold, m => m.revenue > threshold && m.bounceRate < QUESTION_THRESHOLDS.analytics.bounceRateThreshold,
) ).length
questions.push( questions.push(
new QuestionBuilder() new QuestionBuilder()
.id(getId()) .id(getId())

View File

@@ -1,7 +1,7 @@
import type { EventLog } from '../datasets' import type { EventLog } from '../datasets'
import type { Question } from '../types' import type { Question } from '../types'
import { QUESTION_LIMITS } from '../constants' import { QUESTION_LIMITS } from '../constants'
import { countByPredicate, QuestionBuilder, rotateQuestions, SAMPLE_STRIDES } from './utils' import { QuestionBuilder, rotateQuestions, SAMPLE_STRIDES } from './utils'
/** /**
* Generate event log questions * Generate event log questions
@@ -9,9 +9,6 @@ import { countByPredicate, QuestionBuilder, rotateQuestions, SAMPLE_STRIDES } fr
export function generateEventLogsQuestions(logs: EventLog[], getId: () => string): Question[] { export function generateEventLogsQuestions(logs: EventLog[], getId: () => string): Question[] {
const questions: Question[] = [] const questions: Question[] = []
if (logs.length === 0)
return questions
// Field retrieval: log metadata // Field retrieval: log metadata
const logFieldGenerators: Array<(log: EventLog, getId: () => string) => Question> = [ const logFieldGenerators: Array<(log: EventLog, getId: () => string) => Question> = [
(log, getId) => new QuestionBuilder() (log, getId) => new QuestionBuilder()
@@ -76,7 +73,7 @@ export function generateEventLogsQuestions(logs: EventLog[], getId: () => string
// Aggregation: by level // Aggregation: by level
const levels = [...new Set(logs.map(l => l.level))] const levels = [...new Set(logs.map(l => l.level))]
for (const level of levels) { for (const level of levels) {
const count = countByPredicate(logs, l => l.level === level) const count = logs.filter(l => l.level === level).length
questions.push( questions.push(
new QuestionBuilder() new QuestionBuilder()
.id(getId()) .id(getId())
@@ -91,7 +88,7 @@ export function generateEventLogsQuestions(logs: EventLog[], getId: () => string
// Aggregation: by endpoint // Aggregation: by endpoint
const endpoints = [...new Set(logs.map(l => l.endpoint))] const endpoints = [...new Set(logs.map(l => l.endpoint))]
for (const endpoint of endpoints.slice(0, QUESTION_LIMITS.eventLogs.aggregationEndpoints)) { for (const endpoint of endpoints.slice(0, QUESTION_LIMITS.eventLogs.aggregationEndpoints)) {
const count = countByPredicate(logs, l => l.endpoint === endpoint) const count = logs.filter(l => l.endpoint === endpoint).length
questions.push( questions.push(
new QuestionBuilder() new QuestionBuilder()
.id(getId()) .id(getId())
@@ -104,8 +101,8 @@ export function generateEventLogsQuestions(logs: EventLog[], getId: () => string
} }
// Aggregation: by status code range // Aggregation: by status code range
const errorCount = countByPredicate(logs, l => l.statusCode >= 400) const errorCount = logs.filter(l => l.statusCode >= 400).length
const successCount = countByPredicate(logs, l => l.statusCode >= 200 && l.statusCode < 300) const successCount = logs.filter(l => l.statusCode >= 200 && l.statusCode < 300).length
questions.push( questions.push(
new QuestionBuilder() new QuestionBuilder()
@@ -124,12 +121,21 @@ export function generateEventLogsQuestions(logs: EventLog[], getId: () => string
.build(), .build(),
) )
// Aggregation: retryable errors
const retryableErrorCount = logs.filter(l => l.error?.retryable === true).length
questions.push(
new QuestionBuilder()
.id(getId())
.prompt('How many log entries have a retryable error?')
.groundTruth(String(retryableErrorCount))
.type('aggregation')
.dataset('event-logs')
.build(),
)
// Filtering: multi-condition (level AND status) // Filtering: multi-condition (level AND status)
for (const level of levels.slice(0, QUESTION_LIMITS.eventLogs.filteringLevelAndStatus)) { for (const level of levels.slice(0, QUESTION_LIMITS.eventLogs.filteringLevelAndStatus)) {
const count = countByPredicate( const count = logs.filter(l => l.level === level && l.statusCode >= 400).length
logs,
l => l.level === level && l.statusCode >= 400,
)
questions.push( questions.push(
new QuestionBuilder() new QuestionBuilder()
.id(getId()) .id(getId())
@@ -143,10 +149,7 @@ export function generateEventLogsQuestions(logs: EventLog[], getId: () => string
// Filtering: endpoint AND status // Filtering: endpoint AND status
for (const endpoint of endpoints.slice(0, QUESTION_LIMITS.eventLogs.filteringEndpointAndStatus)) { for (const endpoint of endpoints.slice(0, QUESTION_LIMITS.eventLogs.filteringEndpointAndStatus)) {
const count = countByPredicate( const count = logs.filter(l => l.endpoint === endpoint && l.statusCode >= 500).length
logs,
l => l.endpoint === endpoint && l.statusCode >= 500,
)
questions.push( questions.push(
new QuestionBuilder() new QuestionBuilder()
.id(getId()) .id(getId())
@@ -158,5 +161,19 @@ export function generateEventLogsQuestions(logs: EventLog[], getId: () => string
) )
} }
// Filtering: endpoint AND retryable error
for (const endpoint of endpoints.slice(0, QUESTION_LIMITS.eventLogs.filteringEndpointAndStatus)) {
const count = logs.filter(l => l.endpoint === endpoint && l.error?.retryable === true).length
questions.push(
new QuestionBuilder()
.id(getId())
.prompt(`How many log entries for endpoint "${endpoint}" have a retryable error?`)
.groundTruth(String(count))
.type('filtering')
.dataset('event-logs')
.build(),
)
}
return questions return questions
} }

View File

@@ -1,7 +1,7 @@
import type { Repository } from '../datasets' import type { Repository } from '../datasets'
import type { Question } from '../types' import type { Question } from '../types'
import { QUESTION_LIMITS, QUESTION_THRESHOLDS } from '../constants' import { QUESTION_LIMITS, QUESTION_THRESHOLDS } from '../constants'
import { countByPredicate, QuestionBuilder, rotateQuestions, SAMPLE_STRIDES } from './utils' import { QuestionBuilder, rotateQuestions, SAMPLE_STRIDES } from './utils'
/** /**
* Generate GitHub repository questions * Generate GitHub repository questions
@@ -9,9 +9,6 @@ import { countByPredicate, QuestionBuilder, rotateQuestions, SAMPLE_STRIDES } fr
export function generateGithubQuestions(repos: Repository[], getId: () => string): Question[] { export function generateGithubQuestions(repos: Repository[], getId: () => string): Question[] {
const questions: Question[] = [] const questions: Question[] = []
if (repos.length === 0)
return questions
// Field retrieval: repository metadata // Field retrieval: repository metadata
const repoFieldGenerators: Array<(repo: Repository, getId: () => string) => Question> = [ const repoFieldGenerators: Array<(repo: Repository, getId: () => string) => Question> = [
(repo, getId) => new QuestionBuilder() (repo, getId) => new QuestionBuilder()
@@ -92,7 +89,7 @@ export function generateGithubQuestions(repos: Repository[], getId: () => string
// Aggregation: by default branch // Aggregation: by default branch
const branches = [...new Set(repos.map(r => r.defaultBranch))] const branches = [...new Set(repos.map(r => r.defaultBranch))]
for (const branch of branches.slice(0, QUESTION_LIMITS.github.aggregationBranches)) { for (const branch of branches.slice(0, QUESTION_LIMITS.github.aggregationBranches)) {
const count = countByPredicate(repos, r => r.defaultBranch === branch) const count = repos.filter(r => r.defaultBranch === branch).length
questions.push( questions.push(
new QuestionBuilder() new QuestionBuilder()
.id(getId()) .id(getId())
@@ -106,7 +103,7 @@ export function generateGithubQuestions(repos: Repository[], getId: () => string
// Aggregation: high star counts // Aggregation: high star counts
for (const threshold of QUESTION_THRESHOLDS.github.stars) { for (const threshold of QUESTION_THRESHOLDS.github.stars) {
const count = countByPredicate(repos, r => r.stars > threshold) const count = repos.filter(r => r.stars > threshold).length
questions.push( questions.push(
new QuestionBuilder() new QuestionBuilder()
.id(getId()) .id(getId())
@@ -120,7 +117,7 @@ export function generateGithubQuestions(repos: Repository[], getId: () => string
// Aggregation: high fork counts // Aggregation: high fork counts
for (const threshold of QUESTION_THRESHOLDS.github.forks) { for (const threshold of QUESTION_THRESHOLDS.github.forks) {
const count = countByPredicate(repos, r => r.forks > threshold) const count = repos.filter(r => r.forks > threshold).length
questions.push( questions.push(
new QuestionBuilder() new QuestionBuilder()
.id(getId()) .id(getId())
@@ -134,7 +131,7 @@ export function generateGithubQuestions(repos: Repository[], getId: () => string
// Aggregation: high watcher counts // Aggregation: high watcher counts
for (const threshold of QUESTION_THRESHOLDS.github.watchers) { for (const threshold of QUESTION_THRESHOLDS.github.watchers) {
const count = countByPredicate(repos, r => r.watchers > threshold) const count = repos.filter(r => r.watchers > threshold).length
questions.push( questions.push(
new QuestionBuilder() new QuestionBuilder()
.id(getId()) .id(getId())
@@ -148,10 +145,9 @@ export function generateGithubQuestions(repos: Repository[], getId: () => string
// Filtering: multi-condition (stars AND forks) // Filtering: multi-condition (stars AND forks)
for (const combo of QUESTION_THRESHOLDS.github.starForkCombinations.slice(0, QUESTION_LIMITS.github.filteringStarsAndForks)) { for (const combo of QUESTION_THRESHOLDS.github.starForkCombinations.slice(0, QUESTION_LIMITS.github.filteringStarsAndForks)) {
const count = countByPredicate( const count = repos.filter(
repos,
r => r.stars > combo.stars && r.forks > combo.forks, r => r.stars > combo.stars && r.forks > combo.forks,
) ).length
questions.push( questions.push(
new QuestionBuilder() new QuestionBuilder()
.id(getId()) .id(getId())
@@ -165,10 +161,9 @@ export function generateGithubQuestions(repos: Repository[], getId: () => string
// Filtering: stars AND watchers // Filtering: stars AND watchers
for (const combo of QUESTION_THRESHOLDS.github.starWatcherCombinations) { for (const combo of QUESTION_THRESHOLDS.github.starWatcherCombinations) {
const count = countByPredicate( const count = repos.filter(
repos,
r => r.stars > combo.stars && r.watchers > combo.watchers, r => r.stars > combo.stars && r.watchers > combo.watchers,
) ).length
questions.push( questions.push(
new QuestionBuilder() new QuestionBuilder()
.id(getId()) .id(getId())

View File

@@ -10,10 +10,9 @@ import { generateTabularQuestions } from './tabular'
import { createIdGenerator } from './utils' import { createIdGenerator } from './utils'
/** /**
* Generate all questions from datasets * Generate ~200 questions from all datasets
* *
* @remarks * @remarks
* Generates ~150-160 questions across different question types and datasets:
* - Field Retrieval: Direct field access with no computation * - Field Retrieval: Direct field access with no computation
* Examples: "What is X's salary?", "What is the status of order Y?" * Examples: "What is X's salary?", "What is the status of order Y?"
* - Aggregation: Counts, sums, averages, min/max operations (including single-condition filters) * - Aggregation: Counts, sums, averages, min/max operations (including single-condition filters)

View File

@@ -34,6 +34,26 @@ export function generateNestedConfigQuestions(config: NestedConfig | undefined,
prompt: 'What is the session duration?', prompt: 'What is the session duration?',
groundTruth: String(config.authentication.session.duration), groundTruth: String(config.authentication.session.duration),
}, },
{
prompt: 'What is the minimum connection pool size?',
groundTruth: String(config.database.pool.min),
},
{
prompt: 'What is the connection pool idle timeout?',
groundTruth: String(config.database.pool.idleTimeout),
},
{
prompt: 'What is the database name?',
groundTruth: config.database.name,
},
{
prompt: 'What is the session refresh threshold?',
groundTruth: String(config.authentication.session.refreshThreshold),
},
{
prompt: 'What is the version in the configuration?',
groundTruth: config.version,
},
] ]
for (const q of fieldRetrievalQuestions.slice(0, QUESTION_LIMITS.nestedConfig.fieldRetrieval)) { for (const q of fieldRetrievalQuestions.slice(0, QUESTION_LIMITS.nestedConfig.fieldRetrieval)) {
@@ -93,6 +113,18 @@ export function generateNestedConfigQuestions(config: NestedConfig | undefined,
.build(), .build(),
) )
// Aggregation: providers with admin scope
const adminScopeProviderCount = config.authentication.providers.filter(p => p.scopes.includes('admin')).length
questions.push(
new QuestionBuilder()
.id(getId())
.prompt('How many authentication providers include the "admin" scope?')
.groundTruth(String(adminScopeProviderCount))
.type('aggregation')
.dataset('nested-config')
.build(),
)
// Aggregation: feature flag details // Aggregation: feature flag details
const enabledFeatures = Object.entries(config.features).filter(([_, f]) => f.enabled).length const enabledFeatures = Object.entries(config.features).filter(([_, f]) => f.enabled).length
questions.push( questions.push(
@@ -117,6 +149,67 @@ export function generateNestedConfigQuestions(config: NestedConfig | undefined,
.build(), .build(),
) )
// Aggregation: additional nested counts
const totalPermissions = Object.values(config.permissions.roles).reduce((sum, role) => sum + role.permissions.length, 0)
const distinctPermissions = new Set(Object.values(config.permissions.roles).flatMap(r => r.permissions)).size
const distinctScopes = new Set(config.authentication.providers.flatMap(p => p.scopes)).size
const totalVariants = Object.values(config.features).reduce((sum, f) => sum + f.variants.length, 0)
const highPriorityReplicas = config.database.replicas.filter(r => r.priority > 2).length
const featuresWithHighRollout = Object.values(config.features).filter(f => f.rollout > 50).length
const groupsWithMultipleRoles = Object.values(config.permissions.groups).filter(g => g.roles.length > 1).length
questions.push(
new QuestionBuilder()
.id(getId())
.prompt('What is the total number of permissions across all roles?')
.groundTruth(String(totalPermissions))
.type('aggregation')
.dataset('nested-config')
.build(),
new QuestionBuilder()
.id(getId())
.prompt('How many distinct permissions are defined across all roles?')
.groundTruth(String(distinctPermissions))
.type('aggregation')
.dataset('nested-config')
.build(),
new QuestionBuilder()
.id(getId())
.prompt('How many distinct scopes are defined across all authentication providers?')
.groundTruth(String(distinctScopes))
.type('aggregation')
.dataset('nested-config')
.build(),
new QuestionBuilder()
.id(getId())
.prompt('What is the total number of variants across all feature flags?')
.groundTruth(String(totalVariants))
.type('aggregation')
.dataset('nested-config')
.build(),
new QuestionBuilder()
.id(getId())
.prompt('How many database replicas have a priority greater than 2?')
.groundTruth(String(highPriorityReplicas))
.type('aggregation')
.dataset('nested-config')
.build(),
new QuestionBuilder()
.id(getId())
.prompt('How many feature flags have a rollout percentage greater than 50?')
.groundTruth(String(featuresWithHighRollout))
.type('aggregation')
.dataset('nested-config')
.build(),
new QuestionBuilder()
.id(getId())
.prompt('How many groups have more than one role assigned?')
.groundTruth(String(groupsWithMultipleRoles))
.type('aggregation')
.dataset('nested-config')
.build(),
)
// Filtering: complex multi-condition queries // Filtering: complex multi-condition queries
const filteringQuestions = [ const filteringQuestions = [
{ {
@@ -129,6 +222,31 @@ export function generateNestedConfigQuestions(config: NestedConfig | undefined,
groundTruth: String(Object.entries(config.permissions.groups) groundTruth: String(Object.entries(config.permissions.groups)
.filter(([_, g]) => g.roles.includes('admin')).length), .filter(([_, g]) => g.roles.includes('admin')).length),
}, },
{
prompt: 'How many database replicas have priority greater than 2 and port 5432?',
groundTruth: String(config.database.replicas
.filter(r => r.priority > 2 && r.port === 5432).length),
},
{
prompt: 'How many authentication providers have more than 2 scopes?',
groundTruth: String(config.authentication.providers
.filter(p => p.scopes.length > 2).length),
},
{
prompt: 'How many roles have at least 5 permissions?',
groundTruth: String(Object.values(config.permissions.roles)
.filter(r => r.permissions.length >= 5).length),
},
{
prompt: 'How many feature flags are disabled with rollout less than 25%?',
groundTruth: String(Object.values(config.features)
.filter(f => !f.enabled && f.rollout < 25).length),
},
{
prompt: 'How many enabled features have at least 2 variants?',
groundTruth: String(Object.values(config.features)
.filter(f => f.enabled && f.variants.length >= 2).length),
},
] ]
for (const q of filteringQuestions.slice(0, QUESTION_LIMITS.nestedConfig.filteringComplex)) { for (const q of filteringQuestions.slice(0, QUESTION_LIMITS.nestedConfig.filteringComplex)) {

View File

@@ -1,7 +1,7 @@
import type { Order } from '../datasets' import type { Order } from '../datasets'
import type { Question } from '../types' import type { Question } from '../types'
import { QUESTION_LIMITS, QUESTION_THRESHOLDS } from '../constants' import { QUESTION_LIMITS, QUESTION_THRESHOLDS } from '../constants'
import { countByPredicate, QuestionBuilder, rotateQuestions, SAMPLE_STRIDES } from './utils' import { QuestionBuilder, rotateQuestions, SAMPLE_STRIDES } from './utils'
/** /**
* Generate nested (orders) questions * Generate nested (orders) questions
@@ -9,9 +9,6 @@ import { countByPredicate, QuestionBuilder, rotateQuestions, SAMPLE_STRIDES } fr
export function generateNestedQuestions(orders: Order[], getId: () => string): Question[] { export function generateNestedQuestions(orders: Order[], getId: () => string): Question[] {
const questions: Question[] = [] const questions: Question[] = []
if (orders.length === 0)
return questions
// Field retrieval: order totals and statuses // Field retrieval: order totals and statuses
const orderFieldGenerators: Array<(order: Order, getId: () => string) => Question> = [ const orderFieldGenerators: Array<(order: Order, getId: () => string) => Question> = [
(order, getId) => new QuestionBuilder() (order, getId) => new QuestionBuilder()
@@ -89,7 +86,7 @@ export function generateNestedQuestions(orders: Order[], getId: () => string): Q
// Count by status // Count by status
const statuses = [...new Set(orders.map(o => o.status))] const statuses = [...new Set(orders.map(o => o.status))]
for (const status of statuses.slice(0, QUESTION_LIMITS.nested.aggregationStatuses)) { for (const status of statuses.slice(0, QUESTION_LIMITS.nested.aggregationStatuses)) {
const count = countByPredicate(orders, o => o.status === status) const count = orders.filter(o => o.status === status).length
questions.push( questions.push(
new QuestionBuilder() new QuestionBuilder()
.id(getId()) .id(getId())
@@ -134,7 +131,7 @@ export function generateNestedQuestions(orders: Order[], getId: () => string): Q
// Aggregation: high-value orders (single-condition filter) // Aggregation: high-value orders (single-condition filter)
for (const threshold of QUESTION_THRESHOLDS.nested.highValueOrders) { for (const threshold of QUESTION_THRESHOLDS.nested.highValueOrders) {
const count = countByPredicate(orders, o => o.total > threshold) const count = orders.filter(o => o.total > threshold).length
questions.push( questions.push(
new QuestionBuilder() new QuestionBuilder()
.id(getId()) .id(getId())
@@ -149,10 +146,9 @@ export function generateNestedQuestions(orders: Order[], getId: () => string): Q
// Filtering: multi-condition queries (status AND value) // Filtering: multi-condition queries (status AND value)
const orderStatuses = [...new Set(orders.map(o => o.status))] const orderStatuses = [...new Set(orders.map(o => o.status))]
for (const status of orderStatuses.slice(0, QUESTION_LIMITS.nested.filteringStatusAndValue)) { for (const status of orderStatuses.slice(0, QUESTION_LIMITS.nested.filteringStatusAndValue)) {
const count = countByPredicate( const count = orders.filter(
orders,
o => o.status === status && o.total > QUESTION_THRESHOLDS.nested.statusValueThreshold, o => o.status === status && o.total > QUESTION_THRESHOLDS.nested.statusValueThreshold,
) ).length
questions.push( questions.push(
new QuestionBuilder() new QuestionBuilder()
.id(getId()) .id(getId())
@@ -166,10 +162,9 @@ export function generateNestedQuestions(orders: Order[], getId: () => string): Q
// Filtering: status AND items count (multi-condition) // Filtering: status AND items count (multi-condition)
for (const status of orderStatuses.slice(0, QUESTION_LIMITS.nested.filteringStatusAndItems)) { for (const status of orderStatuses.slice(0, QUESTION_LIMITS.nested.filteringStatusAndItems)) {
const count = countByPredicate( const count = orders.filter(
orders,
o => o.status === status && o.items.length >= QUESTION_THRESHOLDS.nested.itemCountThreshold, o => o.status === status && o.items.length >= QUESTION_THRESHOLDS.nested.itemCountThreshold,
) ).length
questions.push( questions.push(
new QuestionBuilder() new QuestionBuilder()
.id(getId()) .id(getId())
@@ -183,10 +178,9 @@ export function generateNestedQuestions(orders: Order[], getId: () => string): Q
// Filtering: total AND items count (multi-condition) // Filtering: total AND items count (multi-condition)
for (const threshold of QUESTION_THRESHOLDS.nested.totalThresholdsForItems) { for (const threshold of QUESTION_THRESHOLDS.nested.totalThresholdsForItems) {
const count = countByPredicate( const count = orders.filter(
orders,
o => o.total > threshold && o.items.length >= QUESTION_THRESHOLDS.nested.itemCountThreshold, o => o.total > threshold && o.items.length >= QUESTION_THRESHOLDS.nested.itemCountThreshold,
) ).length
questions.push( questions.push(
new QuestionBuilder() new QuestionBuilder()
.id(getId()) .id(getId())

View File

@@ -1,7 +1,7 @@
import type { Employee } from '../datasets' import type { Employee } from '../datasets'
import type { Question } from '../types' import type { Question } from '../types'
import { QUESTION_LIMITS, QUESTION_THRESHOLDS } from '../constants' import { QUESTION_LIMITS, QUESTION_THRESHOLDS } from '../constants'
import { countByPredicate, QuestionBuilder, rotateQuestions, SAMPLE_STRIDES } from './utils' import { QuestionBuilder, rotateQuestions, SAMPLE_STRIDES } from './utils'
/** /**
* Generate tabular (employee) questions * Generate tabular (employee) questions
@@ -9,9 +9,6 @@ import { countByPredicate, QuestionBuilder, rotateQuestions, SAMPLE_STRIDES } fr
export function generateTabularQuestions(employees: Employee[], getId: () => string): Question[] { export function generateTabularQuestions(employees: Employee[], getId: () => string): Question[] {
const questions: Question[] = [] const questions: Question[] = []
if (employees.length === 0)
return questions
// Field retrieval: specific employees // Field retrieval: specific employees
const fieldGenerators: Array<(emp: Employee, getId: () => string) => Question> = [ const fieldGenerators: Array<(emp: Employee, getId: () => string) => Question> = [
(emp, getId) => new QuestionBuilder() (emp, getId) => new QuestionBuilder()
@@ -62,7 +59,7 @@ export function generateTabularQuestions(employees: Employee[], getId: () => str
// Aggregation: count by department // Aggregation: count by department
const departments = [...new Set(employees.map(e => e.department))] const departments = [...new Set(employees.map(e => e.department))]
for (const dept of departments.slice(0, QUESTION_LIMITS.tabular.aggregationDepartments)) { for (const dept of departments.slice(0, QUESTION_LIMITS.tabular.aggregationDepartments)) {
const count = countByPredicate(employees, e => e.department === dept) const count = employees.filter(e => e.department === dept).length
questions.push( questions.push(
new QuestionBuilder() new QuestionBuilder()
.id(getId()) .id(getId())
@@ -76,7 +73,7 @@ export function generateTabularQuestions(employees: Employee[], getId: () => str
// Aggregation: salary ranges (single-condition filters) // Aggregation: salary ranges (single-condition filters)
for (const threshold of QUESTION_THRESHOLDS.tabular.salaryRanges) { for (const threshold of QUESTION_THRESHOLDS.tabular.salaryRanges) {
const count = countByPredicate(employees, e => e.salary > threshold) const count = employees.filter(e => e.salary > threshold).length
questions.push( questions.push(
new QuestionBuilder() new QuestionBuilder()
.id(getId()) .id(getId())
@@ -91,8 +88,8 @@ export function generateTabularQuestions(employees: Employee[], getId: () => str
// Aggregation: totals and averages // Aggregation: totals and averages
const totalEmployees = employees.length const totalEmployees = employees.length
const avgSalary = Math.round(employees.reduce((sum, e) => sum + e.salary, 0) / totalEmployees) const avgSalary = Math.round(employees.reduce((sum, e) => sum + e.salary, 0) / totalEmployees)
const activeCount = countByPredicate(employees, e => e.active) const activeCount = employees.filter(e => e.active).length
const inactiveCount = countByPredicate(employees, e => !e.active) const inactiveCount = employees.filter(e => !e.active).length
questions.push( questions.push(
new QuestionBuilder() new QuestionBuilder()
@@ -127,10 +124,9 @@ export function generateTabularQuestions(employees: Employee[], getId: () => str
// Filtering: count by department with salary filter (multi-condition) // Filtering: count by department with salary filter (multi-condition)
for (const dept of departments.slice(0, QUESTION_LIMITS.tabular.filteringMultiConditionDepartments)) { for (const dept of departments.slice(0, QUESTION_LIMITS.tabular.filteringMultiConditionDepartments)) {
const count = countByPredicate( const count = employees.filter(
employees,
e => e.department === dept && e.salary > QUESTION_THRESHOLDS.tabular.departmentSalaryThreshold, e => e.department === dept && e.salary > QUESTION_THRESHOLDS.tabular.departmentSalaryThreshold,
) ).length
questions.push( questions.push(
new QuestionBuilder() new QuestionBuilder()
.id(getId()) .id(getId())
@@ -144,7 +140,7 @@ export function generateTabularQuestions(employees: Employee[], getId: () => str
// Filtering: active employees by experience (multi-condition) // Filtering: active employees by experience (multi-condition)
for (const exp of QUESTION_THRESHOLDS.tabular.experienceYears.slice(0, QUESTION_LIMITS.tabular.filteringExperience)) { for (const exp of QUESTION_THRESHOLDS.tabular.experienceYears.slice(0, QUESTION_LIMITS.tabular.filteringExperience)) {
const count = countByPredicate(employees, e => e.yearsExperience > exp && e.active) const count = employees.filter(e => e.yearsExperience > exp && e.active).length
questions.push( questions.push(
new QuestionBuilder() new QuestionBuilder()
.id(getId()) .id(getId())
@@ -158,10 +154,9 @@ export function generateTabularQuestions(employees: Employee[], getId: () => str
// Filtering: department by experience (multi-condition) // Filtering: department by experience (multi-condition)
for (const dept of departments.slice(0, QUESTION_LIMITS.tabular.filteringDepartmentExp)) { for (const dept of departments.slice(0, QUESTION_LIMITS.tabular.filteringDepartmentExp)) {
const count = countByPredicate( const count = employees.filter(
employees,
e => e.department === dept && e.yearsExperience > QUESTION_THRESHOLDS.tabular.departmentExperienceThreshold, e => e.department === dept && e.yearsExperience > QUESTION_THRESHOLDS.tabular.departmentExperienceThreshold,
) ).length
questions.push( questions.push(
new QuestionBuilder() new QuestionBuilder()
.id(getId()) .id(getId())
@@ -175,7 +170,7 @@ export function generateTabularQuestions(employees: Employee[], getId: () => str
// Filtering: department by active status (multi-condition) // Filtering: department by active status (multi-condition)
for (const dept of departments.slice(0, QUESTION_LIMITS.tabular.filteringDepartmentActive)) { for (const dept of departments.slice(0, QUESTION_LIMITS.tabular.filteringDepartmentActive)) {
const count = countByPredicate(employees, e => e.department === dept && e.active) const count = employees.filter(e => e.department === dept && e.active).length
questions.push( questions.push(
new QuestionBuilder() new QuestionBuilder()
.id(getId()) .id(getId())

View File

@@ -61,14 +61,7 @@ export class QuestionBuilder {
} }
/** /**
* Helper: Count items matching a predicate * Rotate through question generators
*/
export function countByPredicate<T>(items: T[], predicate: (item: T) => boolean): number {
return items.filter(predicate).length
}
/**
* Helper: Rotate through question generators
*/ */
export function rotateQuestions<T>( export function rotateQuestions<T>(
items: T[], items: T[],

View File

@@ -15,7 +15,7 @@ export interface Question {
id: string id: string
prompt: string prompt: string
groundTruth: string groundTruth: string
type: 'field-retrieval' | 'aggregation' | 'filtering' | 'comparison' type: 'field-retrieval' | 'aggregation' | 'filtering'
dataset: string dataset: string
} }