chore(benchmarks): add structure-awareness questions

This commit is contained in:
Johann Schopplich
2025-11-07 09:03:51 +01:00
parent 853c3babea
commit 89df613059
13 changed files with 522 additions and 67 deletions

View File

@@ -4,37 +4,11 @@ import * as url from 'node:url'
export const ROOT_DIR: string = url.fileURLToPath(new URL('../../', import.meta.url))
export const BENCHMARKS_DIR: string = url.fileURLToPath(new URL('../', import.meta.url))
/**
* Model-specific RPM (requests per minute) limits to handle API quotas
*
* @remarks
* Set `undefined` for models without specific limits.
*/
/// keep-sorted
export const MODEL_RPM_LIMITS: Record<string, number | undefined> = {
'claude-haiku-4-5-20251001': 50,
'gemini-2.5-flash': 25,
'gpt-5-nano': 50,
'grok-4-fast-non-reasoning': 50,
}
/**
* Default concurrency for parallel evaluations to prevent bursting
*/
export const DEFAULT_CONCURRENCY = 10
/**
* Display names for data format types
*/
export const FORMATTER_DISPLAY_NAMES: Record<string, string> = {
'json-pretty': 'JSON',
'json-compact': 'JSON compact',
'toon': 'TOON',
'csv': 'CSV',
'xml': 'XML',
'yaml': 'YAML',
} as const
/**
* Enable dry run mode for quick testing with limited AI requests
*
@@ -51,12 +25,80 @@ export const DRY_RUN_LIMITS = {
maxQuestions: 10,
}
/**
* Model-specific RPM (requests per minute) limits to handle API quotas
*
* @remarks
* Set `undefined` for models without specific limits.
*/
/// keep-sorted
export const MODEL_RPM_LIMITS: Record<string, number | undefined> = {
'claude-haiku-4-5-20251001': 50,
'gemini-2.5-flash': 25,
'gpt-5-nano': 50,
'grok-4-fast-non-reasoning': 50,
}
/**
* Display names for data format types
*/
export const FORMATTER_DISPLAY_NAMES: Record<string, string> = {
'json-pretty': 'JSON',
'json-compact': 'JSON compact',
'toon': 'TOON',
'csv': 'CSV',
'xml': 'XML',
'yaml': 'YAML',
} as const
/**
* Question type identifiers
*/
export const QUESTION_TYPES = [
'field-retrieval',
'aggregation',
'filtering',
'structure-awareness',
] as const
/**
* Display names for question types
*/
export const QUESTION_TYPE_LABELS = {
'field-retrieval': 'Field Retrieval',
'aggregation': 'Aggregation',
'filtering': 'Filtering',
'structure-awareness': 'Structure Awareness',
} as const
/**
* Dataset identifiers
*/
export const DATASET_NAMES = [
'tabular',
'nested',
'analytics',
'github',
'event-logs',
'nested-config',
] as const
/**
* Structure class identifiers
*/
export const STRUCTURE_CLASSES = [
'uniform',
'semi-uniform',
'nested',
'deep',
] as const
/**
* Threshold values for filtering and aggregation questions
*/
export const QUESTION_THRESHOLDS = {
tabular: {
salaryRanges: [60000, 80000, 100000, 120000],
salaryRanges: [60000, 80000, 100000],
experienceYears: [5, 10, 15, 20],
departmentSalaryThreshold: 80000,
departmentExperienceThreshold: 10,
@@ -68,11 +110,11 @@ export const QUESTION_THRESHOLDS = {
totalThresholdsForItems: [300, 500],
},
analytics: {
views: [5000, 7000],
conversions: [10, 30],
views: [6000],
conversions: [20],
viewsForFiltering: [6000, 7000],
conversionsForFiltering: 15,
revenueThresholds: [500, 1000, 1500, 2000, 2500],
revenueThresholds: [1000, 1500, 2000],
viewsThresholdForRevenue: 6000,
clicksForFiltering: [250, 400],
conversionsForClickFiltering: 15,
@@ -81,8 +123,8 @@ export const QUESTION_THRESHOLDS = {
},
github: {
stars: [100000, 150000, 200000],
forks: [20000, 35000, 50000],
watchers: [5000, 8000],
forks: [20000, 35000],
watchers: [8000],
starForkCombinations: [
{ stars: 75000, forks: 15000 },
{ stars: 100000, forks: 20000 },
@@ -101,18 +143,18 @@ export const QUESTION_THRESHOLDS = {
*/
export const QUESTION_LIMITS = {
tabular: {
fieldRetrieval: 14,
aggregationDepartments: 4,
fieldRetrieval: 12,
aggregationDepartments: 3,
filteringMultiConditionDepartments: 5,
filteringExperience: 3,
filteringDepartmentExp: 3,
filteringDepartmentActive: 3,
filteringDepartmentActive: 2,
},
nested: {
fieldRetrievalOrders: 8,
fieldRetrievalCustomers: 10,
aggregationStatuses: 5,
filteringStatusAndValue: 5,
fieldRetrievalCustomers: 8,
aggregationStatuses: 3,
filteringStatusAndValue: 4,
filteringStatusAndItems: 3,
},
analytics: {
@@ -121,16 +163,17 @@ export const QUESTION_LIMITS = {
github: {
fieldRetrievalRepos: 11,
aggregationBranches: 2,
filteringStarsAndForks: 8,
filteringStarsAndForks: 3,
},
eventLogs: {
fieldRetrieval: 10,
aggregationEndpoints: 4,
aggregationEndpoints: 2,
filteringLevelAndStatus: 3,
filteringEndpointAndStatus: 3,
filteringEndpointRetryable: 2,
},
nestedConfig: {
fieldRetrieval: 10,
filteringComplex: 6,
filteringComplex: 5,
},
} as const