mirror of
https://github.com/voson-wang/toon.git
synced 2026-01-29 15:24:10 +08:00
chore(benchmarks): add structure-awareness questions
This commit is contained in:
324
benchmarks/src/questions/structure.ts
Normal file
324
benchmarks/src/questions/structure.ts
Normal file
@@ -0,0 +1,324 @@
|
||||
import type { AnalyticsMetric, Employee, EventLog, Order, Repository } from '../datasets'
|
||||
import type { Question } from '../types'
|
||||
import { QuestionBuilder } from './utils'
|
||||
|
||||
/**
|
||||
* Generate structure-awareness questions across all datasets
|
||||
*
|
||||
* These questions test format-native structural affordances:
|
||||
* - TOON's explicit array length [N] and field declarations {fields}
|
||||
* - CSV's header row (but no explicit length)
|
||||
* - JSON/YAML have neither unless the model counts manually
|
||||
*/
|
||||
export function generateStructureQuestions(
|
||||
employees: Employee[],
|
||||
orders: Order[],
|
||||
metrics: AnalyticsMetric[],
|
||||
repos: Repository[],
|
||||
logs: EventLog[],
|
||||
getId: () => string,
|
||||
): Question[] {
|
||||
const questions: Question[] = []
|
||||
|
||||
// ========== TABULAR DATASET (Employees) ==========
|
||||
|
||||
// Count: Total employees (tests array length awareness)
|
||||
questions.push(
|
||||
new QuestionBuilder()
|
||||
.id(getId())
|
||||
.prompt('How many employees are in the dataset?')
|
||||
.groundTruth(String(employees.length))
|
||||
.type('structure-awareness')
|
||||
.dataset('tabular')
|
||||
.build(),
|
||||
)
|
||||
|
||||
// Field list: Employee fields (tests field name awareness)
|
||||
const employeeFields = 'id,name,email,department,salary,yearsExperience,active'
|
||||
questions.push(
|
||||
new QuestionBuilder()
|
||||
.id(getId())
|
||||
.prompt('List the field names for employees (comma-separated, in order).')
|
||||
.groundTruth(employeeFields)
|
||||
.type('structure-awareness')
|
||||
.dataset('tabular')
|
||||
.build(),
|
||||
)
|
||||
|
||||
// Positional: Third field name for employees (tests TOON {fields} syntax)
|
||||
questions.push(
|
||||
new QuestionBuilder()
|
||||
.id(getId())
|
||||
.prompt('What is the 3rd field name for employees?')
|
||||
.groundTruth('email')
|
||||
.type('structure-awareness')
|
||||
.dataset('tabular')
|
||||
.build(),
|
||||
)
|
||||
|
||||
// Last row: Last employee's department (tests ability to find last row using length)
|
||||
const lastEmployee = employees.at(-1)!
|
||||
questions.push(
|
||||
new QuestionBuilder()
|
||||
.id(getId())
|
||||
.prompt('What is the department of the last employee in the dataset?')
|
||||
.groundTruth(lastEmployee.department)
|
||||
.type('structure-awareness')
|
||||
.dataset('tabular')
|
||||
.build(),
|
||||
)
|
||||
|
||||
// Last row: Last employee's name
|
||||
questions.push(
|
||||
new QuestionBuilder()
|
||||
.id(getId())
|
||||
.prompt('What is the name of the last employee in the dataset?')
|
||||
.groundTruth(lastEmployee.name)
|
||||
.type('structure-awareness')
|
||||
.dataset('tabular')
|
||||
.build(),
|
||||
)
|
||||
|
||||
// Field count: How many fields per employee (tests schema awareness)
|
||||
questions.push(
|
||||
new QuestionBuilder()
|
||||
.id(getId())
|
||||
.prompt('How many fields does each employee record have?')
|
||||
.groundTruth('7')
|
||||
.type('structure-awareness')
|
||||
.dataset('tabular')
|
||||
.build(),
|
||||
)
|
||||
|
||||
// ========== NESTED DATASET (Orders) ==========
|
||||
|
||||
// Count: Total orders
|
||||
questions.push(
|
||||
new QuestionBuilder()
|
||||
.id(getId())
|
||||
.prompt('How many orders are in the dataset?')
|
||||
.groundTruth(String(orders.length))
|
||||
.type('structure-awareness')
|
||||
.dataset('nested')
|
||||
.build(),
|
||||
)
|
||||
|
||||
// Field list: Order fields
|
||||
const orderFields = 'orderId,customer,items,subtotal,tax,total,status,orderDate'
|
||||
questions.push(
|
||||
new QuestionBuilder()
|
||||
.id(getId())
|
||||
.prompt('List the top-level field names for orders (comma-separated, in order).')
|
||||
.groundTruth(orderFields)
|
||||
.type('structure-awareness')
|
||||
.dataset('nested')
|
||||
.build(),
|
||||
)
|
||||
|
||||
// Nested count: Items in specific order
|
||||
const orderWithManyItems = orders.reduce((max, order) =>
|
||||
order.items.length > max.items.length ? order : max,
|
||||
)
|
||||
questions.push(
|
||||
new QuestionBuilder()
|
||||
.id(getId())
|
||||
.prompt(`How many items are in order ${orderWithManyItems.orderId}?`)
|
||||
.groundTruth(String(orderWithManyItems.items.length))
|
||||
.type('structure-awareness')
|
||||
.dataset('nested')
|
||||
.build(),
|
||||
)
|
||||
|
||||
// Nested field list: Item fields
|
||||
const itemFields = 'sku,name,quantity,price'
|
||||
questions.push(
|
||||
new QuestionBuilder()
|
||||
.id(getId())
|
||||
.prompt('What are the field names for items within orders (comma-separated, in order)?')
|
||||
.groundTruth(itemFields)
|
||||
.type('structure-awareness')
|
||||
.dataset('nested')
|
||||
.build(),
|
||||
)
|
||||
|
||||
// Last row: Last order's status
|
||||
const lastOrder = orders.at(-1)!
|
||||
questions.push(
|
||||
new QuestionBuilder()
|
||||
.id(getId())
|
||||
.prompt('What is the status of the last order in the dataset?')
|
||||
.groundTruth(lastOrder.status)
|
||||
.type('structure-awareness')
|
||||
.dataset('nested')
|
||||
.build(),
|
||||
)
|
||||
|
||||
// Customer field list
|
||||
const customerFields = 'id,name,email,phone'
|
||||
questions.push(
|
||||
new QuestionBuilder()
|
||||
.id(getId())
|
||||
.prompt('What are the field names for customer objects within orders (comma-separated, in order)?')
|
||||
.groundTruth(customerFields)
|
||||
.type('structure-awareness')
|
||||
.dataset('nested')
|
||||
.build(),
|
||||
)
|
||||
|
||||
// ========== ANALYTICS DATASET (Metrics) ==========
|
||||
|
||||
// Count: Total metrics
|
||||
questions.push(
|
||||
new QuestionBuilder()
|
||||
.id(getId())
|
||||
.prompt('How many metric records are in the dataset?')
|
||||
.groundTruth(String(metrics.length))
|
||||
.type('structure-awareness')
|
||||
.dataset('analytics')
|
||||
.build(),
|
||||
)
|
||||
|
||||
// Field list: Metric fields
|
||||
const metricFields = 'date,views,clicks,conversions,revenue,bounceRate'
|
||||
questions.push(
|
||||
new QuestionBuilder()
|
||||
.id(getId())
|
||||
.prompt('List the field names for metrics (comma-separated, in order).')
|
||||
.groundTruth(metricFields)
|
||||
.type('structure-awareness')
|
||||
.dataset('analytics')
|
||||
.build(),
|
||||
)
|
||||
|
||||
// Positional: Fifth field name for metrics (tests TOON {fields} syntax)
|
||||
questions.push(
|
||||
new QuestionBuilder()
|
||||
.id(getId())
|
||||
.prompt('What is the 5th field name for analytics metrics?')
|
||||
.groundTruth('revenue')
|
||||
.type('structure-awareness')
|
||||
.dataset('analytics')
|
||||
.build(),
|
||||
)
|
||||
|
||||
// Last row: Last metric's date
|
||||
const lastMetric = metrics.at(-1)!
|
||||
questions.push(
|
||||
new QuestionBuilder()
|
||||
.id(getId())
|
||||
.prompt('What is the date of the last metric record in the dataset?')
|
||||
.groundTruth(lastMetric.date)
|
||||
.type('structure-awareness')
|
||||
.dataset('analytics')
|
||||
.build(),
|
||||
)
|
||||
|
||||
// Field count: How many fields per metric
|
||||
questions.push(
|
||||
new QuestionBuilder()
|
||||
.id(getId())
|
||||
.prompt('How many fields does each metric record have?')
|
||||
.groundTruth('6')
|
||||
.type('structure-awareness')
|
||||
.dataset('analytics')
|
||||
.build(),
|
||||
)
|
||||
|
||||
// ========== GITHUB DATASET (Repositories) ==========
|
||||
|
||||
// Count: Total repositories
|
||||
questions.push(
|
||||
new QuestionBuilder()
|
||||
.id(getId())
|
||||
.prompt('How many repositories are in the dataset?')
|
||||
.groundTruth(String(repos.length))
|
||||
.type('structure-awareness')
|
||||
.dataset('github')
|
||||
.build(),
|
||||
)
|
||||
|
||||
// Field list: Repository fields
|
||||
const repoFields = 'id,name,repo,description,stars,watchers,forks,defaultBranch,createdAt,updatedAt,pushedAt'
|
||||
questions.push(
|
||||
new QuestionBuilder()
|
||||
.id(getId())
|
||||
.prompt('List the field names for repositories (comma-separated, in order).')
|
||||
.groundTruth(repoFields)
|
||||
.type('structure-awareness')
|
||||
.dataset('github')
|
||||
.build(),
|
||||
)
|
||||
|
||||
// Positional: Seventh field name for repos (tests TOON {fields} syntax)
|
||||
questions.push(
|
||||
new QuestionBuilder()
|
||||
.id(getId())
|
||||
.prompt('What is the 7th field name for GitHub repositories?')
|
||||
.groundTruth('forks')
|
||||
.type('structure-awareness')
|
||||
.dataset('github')
|
||||
.build(),
|
||||
)
|
||||
|
||||
// Last row: Last repo's name
|
||||
const lastRepo = repos.at(-1)!
|
||||
questions.push(
|
||||
new QuestionBuilder()
|
||||
.id(getId())
|
||||
.prompt('What is the name of the last repository in the dataset?')
|
||||
.groundTruth(lastRepo.name)
|
||||
.type('structure-awareness')
|
||||
.dataset('github')
|
||||
.build(),
|
||||
)
|
||||
|
||||
// Field count: How many fields per repository
|
||||
questions.push(
|
||||
new QuestionBuilder()
|
||||
.id(getId())
|
||||
.prompt('How many fields does each repository record have?')
|
||||
.groundTruth('11')
|
||||
.type('structure-awareness')
|
||||
.dataset('github')
|
||||
.build(),
|
||||
)
|
||||
|
||||
// ========== EVENT LOGS DATASET ==========
|
||||
|
||||
// Count: Total logs
|
||||
questions.push(
|
||||
new QuestionBuilder()
|
||||
.id(getId())
|
||||
.prompt('How many log entries are in the dataset?')
|
||||
.groundTruth(String(logs.length))
|
||||
.type('structure-awareness')
|
||||
.dataset('event-logs')
|
||||
.build(),
|
||||
)
|
||||
|
||||
// Field list: Base log fields (including optional error)
|
||||
const logFields = 'timestamp,level,endpoint,statusCode,responseTime,userId,error'
|
||||
questions.push(
|
||||
new QuestionBuilder()
|
||||
.id(getId())
|
||||
.prompt('List the field names for log entries (comma-separated, any order, including optional fields).')
|
||||
.groundTruth(logFields)
|
||||
.type('structure-awareness')
|
||||
.dataset('event-logs')
|
||||
.build(),
|
||||
)
|
||||
|
||||
// Last row: Last log's level
|
||||
const lastLog = logs.at(-1)!
|
||||
questions.push(
|
||||
new QuestionBuilder()
|
||||
.id(getId())
|
||||
.prompt('What is the level of the last log entry in the dataset?')
|
||||
.groundTruth(lastLog.level)
|
||||
.type('structure-awareness')
|
||||
.dataset('event-logs')
|
||||
.build(),
|
||||
)
|
||||
|
||||
return questions
|
||||
}
|
||||
Reference in New Issue
Block a user