test: add LLM retrieval accuracy tests

2026-01-29 23:34:10 +08:00 · 2025-10-27 11:48:33 +01:00
parent eb8f7e28e1
commit 3c840259fe
25 changed files with 21404 additions and 723 deletions
--- a/benchmarks/src/types.ts
+++ b/benchmarks/src/types.ts
@@ -0,0 +1,35 @@
+export interface Dataset {
+  name: string
+  description: string
+  data: any
+}
+
+export interface Question {
+  id: string
+  prompt: string
+  groundTruth: string
+  type: 'field-retrieval' | 'aggregation' | 'filtering' | 'comparison'
+  dataset: string
+}
+
+export interface EvaluationResult {
+  questionId: string
+  format: string
+  model: string
+  expected: string
+  actual: string
+  correct: boolean
+  inputTokens: number
+  outputTokens: number
+  latencyMs: number
+}
+
+export interface FormatResult {
+  format: string
+  accuracy: number
+  totalTokens: number
+  avgInputTokens: number
+  avgLatency: number
+  correctCount: number
+  totalCount: number
+}