test: add LLM retrieval accuracy tests

This commit is contained in:
Johann Schopplich
2025-10-27 11:48:33 +01:00
parent eb8f7e28e1
commit 3c840259fe
25 changed files with 21404 additions and 723 deletions

35
benchmarks/src/types.ts Normal file
View File

@@ -0,0 +1,35 @@
export interface Dataset {
name: string
description: string
data: any
}
export interface Question {
id: string
prompt: string
groundTruth: string
type: 'field-retrieval' | 'aggregation' | 'filtering' | 'comparison'
dataset: string
}
export interface EvaluationResult {
questionId: string
format: string
model: string
expected: string
actual: string
correct: boolean
inputTokens: number
outputTokens: number
latencyMs: number
}
export interface FormatResult {
format: string
accuracy: number
totalTokens: number
avgInputTokens: number
avgLatency: number
correctCount: number
totalCount: number
}