mirror of
https://github.com/voson-wang/toon.git
synced 2026-01-29 23:34:10 +08:00
chore(benchmarks): finalize structure-awareness run
This commit is contained in:
@@ -10,9 +10,9 @@ import { generateText } from 'ai'
|
||||
* Models used for evaluation
|
||||
*/
|
||||
export const models: LanguageModelV2[] = [
|
||||
openai('gpt-5-nano'),
|
||||
anthropic('claude-haiku-4-5-20251001'),
|
||||
google('gemini-2.5-flash'),
|
||||
openai('gpt-5-nano'),
|
||||
xai('grok-4-fast-non-reasoning'),
|
||||
]
|
||||
|
||||
|
||||
@@ -81,7 +81,7 @@ export function generateAccuracyReport(
|
||||
Benchmarks test LLM comprehension across different input formats using ${totalQuestions} data retrieval questions on ${modelNames.length} ${modelNames.length === 1 ? 'model' : 'models'}.
|
||||
|
||||
<details>
|
||||
<summary><strong>View Dataset Catalog</strong></summary>
|
||||
<summary><strong>Show Dataset Catalog</strong></summary>
|
||||
|
||||
${generateDatasetCatalog(ACCURACY_DATASETS)}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user