mirror of
https://github.com/voson-wang/toon.git
synced 2026-01-29 23:34:10 +08:00
47 lines
2.4 KiB
TypeScript
47 lines
2.4 KiB
TypeScript
import type { AnalyticsMetric, Employee, EventLog, NestedConfig, Order, Repository } from '../datasets'
|
|
import type { Question } from '../types'
|
|
import { ACCURACY_DATASETS } from '../datasets'
|
|
import { generateAnalyticsQuestions } from './analytics'
|
|
import { generateEventLogsQuestions } from './event-logs'
|
|
import { generateGithubQuestions } from './github'
|
|
import { generateNestedQuestions } from './nested'
|
|
import { generateNestedConfigQuestions } from './nested-config'
|
|
import { generateTabularQuestions } from './tabular'
|
|
import { createIdGenerator } from './utils'
|
|
|
|
/**
|
|
* Generate all questions from datasets
|
|
*
|
|
* @remarks
|
|
* Generates ~150-160 questions across different question types and datasets:
|
|
* - Field Retrieval: Direct field access with no computation
|
|
* Examples: "What is X's salary?", "What is the status of order Y?"
|
|
* - Aggregation: Counts, sums, averages, min/max operations (including single-condition filters)
|
|
* Examples: "How many X?", "What is the total/average?", "How many X > threshold?"
|
|
* - Filtering: Multi-condition queries requiring complex logical operations
|
|
* Examples: "How many X WHERE condition1 AND condition2?"
|
|
*/
|
|
export function generateQuestions(): Question[] {
|
|
const questions: Question[] = []
|
|
const idGen = createIdGenerator()
|
|
const getId = () => idGen.next().value
|
|
|
|
// Get datasets with proper typing
|
|
const tabular = (ACCURACY_DATASETS.find(d => d.name === 'tabular')?.data.employees as Employee[]) ?? []
|
|
const nested = (ACCURACY_DATASETS.find(d => d.name === 'nested')?.data.orders as Order[]) ?? []
|
|
const analytics = (ACCURACY_DATASETS.find(d => d.name === 'analytics')?.data.metrics as AnalyticsMetric[]) ?? []
|
|
const github = (ACCURACY_DATASETS.find(d => d.name === 'github')?.data.repositories as Repository[]) ?? []
|
|
const eventLogs = (ACCURACY_DATASETS.find(d => d.name === 'event-logs')?.data.logs as EventLog[]) ?? []
|
|
const nestedConfig = ACCURACY_DATASETS.find(d => d.name === 'nested-config')?.data as NestedConfig | undefined
|
|
|
|
// Generate questions for each dataset
|
|
questions.push(...generateTabularQuestions(tabular, getId))
|
|
questions.push(...generateNestedQuestions(nested, getId))
|
|
questions.push(...generateAnalyticsQuestions(analytics, getId))
|
|
questions.push(...generateGithubQuestions(github, getId))
|
|
questions.push(...generateEventLogsQuestions(eventLogs, getId))
|
|
questions.push(...generateNestedConfigQuestions(nestedConfig, getId))
|
|
|
|
return questions
|
|
}
|