test(benchmark): overhaul generation

This commit is contained in:
Johann Schopplich
2025-11-06 14:45:44 +01:00
parent 9863875706
commit bc711ccecf
19 changed files with 2254 additions and 997 deletions

View File

@@ -0,0 +1,196 @@
import type { AnalyticsMetric } from '../datasets'
import type { Question } from '../types'
import { QUESTION_LIMITS, QUESTION_THRESHOLDS } from '../constants'
import { countByPredicate, QuestionBuilder, rotateQuestions, SAMPLE_STRIDES } from './utils'
/**
* Generate analytics (website metrics) questions
*/
export function generateAnalyticsQuestions(metrics: AnalyticsMetric[], getId: () => string): Question[] {
const questions: Question[] = []
if (metrics.length === 0)
return questions
// Field retrieval: date-based metrics
const metricFieldGenerators: Array<(metric: AnalyticsMetric, getId: () => string) => Question> = [
(metric, getId) => new QuestionBuilder()
.id(getId())
.prompt(`What are the views for ${metric.date}?`)
.groundTruth(String(metric.views))
.type('field-retrieval')
.dataset('analytics')
.build(),
(metric, getId) => new QuestionBuilder()
.id(getId())
.prompt(`What is the revenue for ${metric.date}?`)
.groundTruth(String(metric.revenue))
.type('field-retrieval')
.dataset('analytics')
.build(),
(metric, getId) => new QuestionBuilder()
.id(getId())
.prompt(`What is the bounce rate for ${metric.date}?`)
.groundTruth(String(metric.bounceRate))
.type('field-retrieval')
.dataset('analytics')
.build(),
(metric, getId) => new QuestionBuilder()
.id(getId())
.prompt(`How many conversions were there on ${metric.date}?`)
.groundTruth(String(metric.conversions))
.type('field-retrieval')
.dataset('analytics')
.build(),
]
questions.push(...rotateQuestions(
metrics,
metricFieldGenerators,
QUESTION_LIMITS.analytics.fieldRetrievalDates,
SAMPLE_STRIDES.ANALYTICS_FIELD,
getId,
))
// Aggregation: basic statistics
const totalDays = metrics.length
const totalViews = metrics.reduce((sum, m) => sum + m.views, 0)
const totalConversions = metrics.reduce((sum, m) => sum + m.conversions, 0)
const totalRevenue = metrics.reduce((sum, m) => sum + m.revenue, 0)
const avgBounceRate = metrics.reduce((sum, m) => sum + m.bounceRate, 0) / metrics.length
questions.push(
new QuestionBuilder()
.id(getId())
.prompt('How many days of data are in the dataset?')
.groundTruth(String(totalDays))
.type('aggregation')
.dataset('analytics')
.build(),
new QuestionBuilder()
.id(getId())
.prompt('What is the total number of views across all dates?')
.groundTruth(String(totalViews))
.type('aggregation')
.dataset('analytics')
.build(),
new QuestionBuilder()
.id(getId())
.prompt('What is the total number of conversions across all dates?')
.groundTruth(String(totalConversions))
.type('aggregation')
.dataset('analytics')
.build(),
new QuestionBuilder()
.id(getId())
.prompt('What is the total revenue across all dates?')
.groundTruth(String(totalRevenue.toFixed(2)))
.type('aggregation')
.dataset('analytics')
.build(),
new QuestionBuilder()
.id(getId())
.prompt('What is the average bounce rate?')
.groundTruth(String(avgBounceRate.toFixed(2)))
.type('aggregation')
.dataset('analytics')
.build(),
)
// Aggregation: high views/conversions
for (const threshold of QUESTION_THRESHOLDS.analytics.views) {
const count = countByPredicate(metrics, m => m.views > threshold)
questions.push(
new QuestionBuilder()
.id(getId())
.prompt(`How many days had more than ${threshold} views?`)
.groundTruth(String(count))
.type('aggregation')
.dataset('analytics')
.build(),
)
}
for (const threshold of QUESTION_THRESHOLDS.analytics.conversions) {
const count = countByPredicate(metrics, m => m.conversions > threshold)
questions.push(
new QuestionBuilder()
.id(getId())
.prompt(`How many days had more than ${threshold} conversions?`)
.groundTruth(String(count))
.type('aggregation')
.dataset('analytics')
.build(),
)
}
// Filtering: multi-condition (views AND revenue)
for (const threshold of QUESTION_THRESHOLDS.analytics.viewsForFiltering) {
const count = countByPredicate(
metrics,
m => m.views > threshold && m.conversions > QUESTION_THRESHOLDS.analytics.conversionsForFiltering,
)
questions.push(
new QuestionBuilder()
.id(getId())
.prompt(`How many days had more than ${threshold} views and more than ${QUESTION_THRESHOLDS.analytics.conversionsForFiltering} conversions?`)
.groundTruth(String(count))
.type('filtering')
.dataset('analytics')
.build(),
)
}
// Filtering: revenue thresholds
for (const threshold of QUESTION_THRESHOLDS.analytics.revenueThresholds) {
const count = countByPredicate(
metrics,
m => m.revenue > threshold && m.views > QUESTION_THRESHOLDS.analytics.viewsThresholdForRevenue,
)
questions.push(
new QuestionBuilder()
.id(getId())
.prompt(`How many days had revenue greater than ${threshold} with views above ${QUESTION_THRESHOLDS.analytics.viewsThresholdForRevenue}?`)
.groundTruth(String(count))
.type('filtering')
.dataset('analytics')
.build(),
)
}
// Filtering: clicks and conversions
for (const threshold of QUESTION_THRESHOLDS.analytics.clicksForFiltering) {
const count = countByPredicate(
metrics,
m => m.clicks > threshold && m.conversions > QUESTION_THRESHOLDS.analytics.conversionsForClickFiltering,
)
questions.push(
new QuestionBuilder()
.id(getId())
.prompt(`How many days had more than ${threshold} clicks and more than ${QUESTION_THRESHOLDS.analytics.conversionsForClickFiltering} conversions?`)
.groundTruth(String(count))
.type('filtering')
.dataset('analytics')
.build(),
)
}
// Filtering: revenue and bounce rate
for (const threshold of QUESTION_THRESHOLDS.analytics.revenueForBounceRate) {
const count = countByPredicate(
metrics,
m => m.revenue > threshold && m.bounceRate < QUESTION_THRESHOLDS.analytics.bounceRateThreshold,
)
questions.push(
new QuestionBuilder()
.id(getId())
.prompt(`How many days had revenue greater than ${threshold} with bounce rate below ${QUESTION_THRESHOLDS.analytics.bounceRateThreshold}?`)
.groundTruth(String(count))
.type('filtering')
.dataset('analytics')
.build(),
)
}
return questions
}