{
  "schemaVersion": "1.0",
  "name": "BenchLM models",
  "description": "Stable model metadata, rankings, benchmark scores, and coverage fields for BenchLM model pages.",
  "canonicalUrl": "https://benchlm.ai/data/models.json",
  "generatedAt": "2026-06-12T20:35:11.146Z",
  "sourceLastUpdated": "June 12, 2026",
  "sourceFiles": [
    "src/data/benchmarks.json",
    "src/data/provenance.js",
    "src/data/modelReleaseMetadata.js"
  ],
  "counts": {
    "totalModels": 258,
    "canonicalModels": 175,
    "rankingEligibleModels": 123
  },
  "items": [
    {
      "slug": "claude-mythos-5",
      "canonicalModelKey": "claude-mythos-5",
      "model": "Claude Mythos 5",
      "creator": "Anthropic",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "1M+",
      "contextWindowTokens": 1000000,
      "displayScore": 99,
      "rankingEligible": true,
      "overallRank": 1,
      "url": "https://benchlm.ai/models/claude-mythos-5",
      "markdownUrl": "https://benchlm.ai/md/models/claude-mythos-5.md",
      "id": 9,
      "releaseDate": "2026-06-09",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "claude-mythos",
        "familyName": "Claude Mythos",
        "variantType": "restricted",
        "snapshotLabel": null,
        "baseFamilyModelKey": "claude-mythos",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 99,
        "overallScore": 99,
        "rawOverallScore": 99,
        "verifiedDisplayScore": 85,
        "displayCategoryScores": {
          "agentic": 100,
          "coding": 100,
          "reasoning": null,
          "multimodalGrounded": 98.9,
          "knowledge": 99.5,
          "multilingual": 100,
          "instructionFollowing": 91.4,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": 87,
          "coding": 85.9,
          "reasoning": null,
          "multimodalGrounded": 92.7,
          "knowledge": 74.6,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": true,
        "overallRank": 1,
        "categoryRanks": {
          "agentic": 1,
          "coding": 1,
          "multimodalGrounded": 2,
          "multilingual": 1,
          "instructionFollowing": 17
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": false,
          "multimodalGrounded": true,
          "knowledge": false,
          "multilingual": true,
          "instructionFollowing": true,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 17,
        "verifiedBenchmarkCount": 17,
        "rankableBenchmarkCount": 25,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 3
      },
      "benchmarks": {
        "agentic": {
          "terminalBench2": 88,
          "osWorldVerified": 85,
          "browseComp": 88,
          "gdpvalAa": 1932
        },
        "coding": {
          "sweVerified": 95.5,
          "swePro": 80.3,
          "frontierCode": 29.3,
          "terminalBench2": 88
        },
        "reasoning": {},
        "multimodalGrounded": {
          "mmmuPro": 92.7,
          "sweMultimodal": 54.9,
          "charxiv": 93.5,
          "charxivNoTools": 88.9,
          "blueprintBench2": 38.6
        },
        "knowledge": {
          "gpqa": 94.1,
          "hle": 64.5,
          "hleNoTools": 59
        },
        "multilingual": {
          "sweMultilingual": 92.2
        },
        "instructionFollowing": {},
        "math": {
          "usamo2026": 97.6
        },
        "external": {
          "exploitBench": 78
        }
      }
    },
    {
      "slug": "claude-fable",
      "canonicalModelKey": "claude-fable-5",
      "model": "Claude Fable 5",
      "creator": "Anthropic",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "1M+",
      "contextWindowTokens": 1000000,
      "displayScore": 97,
      "rankingEligible": true,
      "overallRank": 2,
      "url": "https://benchlm.ai/models/claude-fable",
      "markdownUrl": "https://benchlm.ai/md/models/claude-fable.md",
      "id": 257,
      "releaseDate": "2026-06-09",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "claude-fable",
        "familyName": "Claude Fable",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "claude-fable-5",
        "relatedModelKeys": [
          "claude-mythos-5"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 97,
        "overallScore": 96,
        "rawOverallScore": 96,
        "verifiedDisplayScore": 85,
        "displayCategoryScores": {
          "agentic": 100,
          "coding": 100,
          "reasoning": null,
          "multimodalGrounded": 79,
          "knowledge": 99.8,
          "multilingual": 100,
          "instructionFollowing": 92.7,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": 85.2,
          "coding": 85.6,
          "reasoning": null,
          "multimodalGrounded": 92.4,
          "knowledge": 74.8,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": true,
        "overallRank": 2,
        "categoryRanks": {
          "agentic": 2,
          "coding": 2,
          "multimodalGrounded": 18,
          "multilingual": 2,
          "instructionFollowing": 12
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": false,
          "multimodalGrounded": true,
          "knowledge": false,
          "multilingual": true,
          "instructionFollowing": true,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 17,
        "verifiedBenchmarkCount": 17,
        "rankableBenchmarkCount": 28,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 3
      },
      "benchmarks": {
        "agentic": {
          "terminalBench2": 84.3,
          "osWorldVerified": 85,
          "browseComp": 86.9,
          "gdpvalAa": 1932
        },
        "coding": {
          "sweVerified": 95,
          "swePro": 80,
          "frontierCode": 29.3,
          "terminalBench2": 84.3
        },
        "reasoning": {},
        "multimodalGrounded": {
          "mmmuPro": 92.7,
          "sweMultimodal": 59,
          "charxiv": 93.2,
          "charxivNoTools": 86.1,
          "blueprintBench2": 38.6
        },
        "knowledge": {
          "gpqa": 94.5,
          "hle": 64.5,
          "hleNoTools": 59
        },
        "multilingual": {
          "sweMultilingual": 87.3
        },
        "instructionFollowing": {},
        "math": {
          "usamo2026": 97.6
        },
        "external": {
          "exploitBench": 78
        }
      }
    },
    {
      "slug": "claude-opus-4-8",
      "canonicalModelKey": "claude-opus-4-8",
      "model": "Claude Opus 4.8",
      "creator": "Anthropic",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 93,
      "rankingEligible": true,
      "overallRank": 3,
      "url": "https://benchlm.ai/models/claude-opus-4-8",
      "markdownUrl": "https://benchlm.ai/md/models/claude-opus-4-8.md",
      "id": 238,
      "releaseDate": "2026-05-28",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "claude-opus-4-8",
        "familyName": "Claude Opus 4.8",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "claude-opus-4-8",
        "relatedModelKeys": [
          "claude-opus-4-7-max",
          "claude-opus-4-7",
          "claude-opus-4-6"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": "claude-opus-4-7-max"
      },
      "scores": {
        "displayScore": 93,
        "overallScore": 92,
        "rawOverallScore": 92,
        "verifiedDisplayScore": 76,
        "displayCategoryScores": {
          "agentic": 96.4,
          "coding": 98,
          "reasoning": null,
          "multimodalGrounded": 66.4,
          "knowledge": 98.7,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": 80.1,
          "coding": 76.4,
          "reasoning": null,
          "multimodalGrounded": 76.1,
          "knowledge": 70.1,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": true,
        "overallRank": 3,
        "categoryRanks": {
          "agentic": 3,
          "coding": 3,
          "multimodalGrounded": 36,
          "knowledge": 2
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": false,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 23,
        "verifiedBenchmarkCount": 23,
        "rankableBenchmarkCount": 24,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 2
      },
      "benchmarks": {
        "agentic": {
          "terminalBench2": 74.6,
          "browseComp": 84.3,
          "deepSearchQa": 93.1,
          "osWorldVerified": 83.4,
          "financeAgentV2": 53.9,
          "gdpvalAa": 1890,
          "mcpAtlas": 82.2,
          "toolathlon": 59.9,
          "gertLabs": 72.97,
          "aaAgenticIndex": 77.81,
          "tau2Bench": 94.4,
          "gdpvalAaNormalized": 69.5
        },
        "coding": {
          "sweVerified": 88.6,
          "swePro": 69.2,
          "sweMultilingual": 84.4,
          "sweMultimodal": 38.4,
          "terminalBench2": 74.6,
          "cursorBench31": 58.4,
          "aaCodingIndex": 56.71,
          "terminalBenchHard": 58.3,
          "aaSciCode": 53.5
        },
        "reasoning": {
          "lcr": 67.7,
          "critpt": 20.9
        },
        "multimodalGrounded": {
          "officeQaPro": 66.2,
          "screenSpotPro": 87.9,
          "charxiv": 89.9,
          "charxivNoTools": 80.5,
          "designArenaWebsite": 1284
        },
        "knowledge": {
          "gpqa": 93.6,
          "gpqaDiamond": 93.6,
          "hle": 57.9,
          "hleNoTools": 49.8,
          "artificialAnalysis": 61.44,
          "aaGpqaDiamond": 92,
          "aaHle": 45.7,
          "aaOmniscienceIndex": 27.4,
          "omniscienceAccuracy": 46.6,
          "omniscienceHallucinationRate": 35.9
        },
        "multilingual": {
          "include": 87.6
        },
        "instructionFollowing": {
          "aaIfBench": 62.2
        },
        "math": {
          "usamo2026": 96.7
        }
      }
    },
    {
      "slug": "gemini-3-1-pro",
      "canonicalModelKey": "gemini-3-1-pro",
      "model": "Gemini 3.1 Pro",
      "creator": "Google",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 91,
      "rankingEligible": true,
      "overallRank": 4,
      "url": "https://benchlm.ai/models/gemini-3-1-pro",
      "markdownUrl": "https://benchlm.ai/md/models/gemini-3-1-pro.md",
      "id": 14,
      "releaseDate": "2026-02-19",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "gemini-3-1-pro",
        "familyName": "Gemini 3.1 Pro",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "gemini-3-1-pro",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 91,
        "overallScore": 89,
        "rawOverallScore": 89,
        "verifiedDisplayScore": 79,
        "displayCategoryScores": {
          "agentic": 81.6,
          "coding": 93,
          "reasoning": 96.4,
          "multimodalGrounded": 84,
          "knowledge": 93.9,
          "multilingual": 100,
          "instructionFollowing": 93.4,
          "math": 67.7
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": 77.1,
          "multimodalGrounded": 82.8,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 4,
        "categoryRanks": {
          "agentic": 13,
          "coding": 5,
          "reasoning": 2,
          "multimodalGrounded": 14,
          "knowledge": 4,
          "multilingual": 3,
          "instructionFollowing": 10,
          "math": 37
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 17,
        "verifiedBenchmarkCount": 17,
        "rankableBenchmarkCount": 49,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 3
      },
      "benchmarks": {
        "agentic": {
          "clawEval": 57.8,
          "deepSearchQa": 69.7,
          "tau2Bench": 95.6,
          "aaAgenticIndex": 59.09,
          "apexAgentsAa": 32,
          "gdpvalAaNormalized": 40.7,
          "gdpvalAa": 1314,
          "gertLabs": 56.87
        },
        "coding": {
          "liveCodeBenchPro": 82.9,
          "reactNativeEvals": 78.9,
          "vibeCodeBench": 32.034,
          "aaCodingIndex": 55.5,
          "terminalBenchHard": 53.8,
          "aaSciCode": 58.9
        },
        "reasoning": {
          "arcAgi2": 77.1,
          "lcr": 72.7,
          "critpt": 17.7
        },
        "multimodalGrounded": {
          "mmmuPro": 83.9,
          "charxiv": 80.2,
          "erqa": 69.4,
          "simpleVqa": 72.4,
          "screenSpotPro": 84.4,
          "zeroBench": 29,
          "medXpertQaMm": 81.3,
          "gdpvalAa": 1320,
          "aaMmmuPro": 82.4,
          "designArenaWebsite": 1296
        },
        "knowledge": {
          "gpqaDiamond": 94.3,
          "hleNoTools": 45.4,
          "healthBenchHard": 20.6,
          "medXpertQaText": 71.5,
          "artificialAnalysis": 57.18,
          "aaGpqaDiamond": 94.1,
          "aaHle": 44.7,
          "aaOmniscienceIndex": 32.9,
          "omniscienceAccuracy": 55.3,
          "omniscienceHallucinationRate": 49.9
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 77.1
        },
        "math": {}
      }
    },
    {
      "slug": "qwen3-7-max",
      "canonicalModelKey": "qwen3-7-max",
      "model": "Qwen3.7 Max",
      "creator": "Alibaba",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 91,
      "rankingEligible": true,
      "overallRank": 5,
      "url": "https://benchlm.ai/models/qwen3-7-max",
      "markdownUrl": "https://benchlm.ai/md/models/qwen3-7-max.md",
      "id": 15,
      "releaseDate": "2026-05-16",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "qwen3-7-max",
        "familyName": "Qwen3.7 Max",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "qwen3-7-max",
        "relatedModelKeys": [
          "qwen3-6-plus",
          "qwen3-6-max-preview"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 91,
        "overallScore": 89,
        "rawOverallScore": 89,
        "verifiedDisplayScore": 78,
        "displayCategoryScores": {
          "agentic": 86.9,
          "coding": 91.6,
          "reasoning": 94.8,
          "multimodalGrounded": null,
          "knowledge": 85.9,
          "multilingual": 84,
          "instructionFollowing": 93.4,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": 69.7,
          "coding": 73.6,
          "reasoning": 90.4,
          "multimodalGrounded": null,
          "knowledge": 71.2,
          "multilingual": 87,
          "instructionFollowing": 89,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": true,
        "overallRank": 5,
        "categoryRanks": {
          "coding": 6,
          "knowledge": 10,
          "multilingual": 13,
          "instructionFollowing": 11
        },
        "categoryRankingEligible": {
          "agentic": false,
          "coding": true,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 33,
        "verifiedBenchmarkCount": 33,
        "rankableBenchmarkCount": 34,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 3
      },
      "benchmarks": {
        "agentic": {
          "terminalBench2": 69.7,
          "qwenClawBench": 64.3,
          "qwenWebBench": 1568,
          "clawEval": 65.2,
          "bfclV4": 75,
          "mcpAtlas": 76.4,
          "vitaBench": 47.9,
          "hleWithTools": 53.5,
          "aaAgenticIndex": 66.56,
          "tau2Bench": 94.7,
          "gdpvalAaNormalized": 52.2,
          "gdpvalAa": 1543,
          "gertLabs": 64.27
        },
        "coding": {
          "sweVerified": 80.4,
          "swePro": 60.6,
          "sweMultilingual": 78.3,
          "nl2Repo": 47.2,
          "sciCode": 53.5,
          "liveCodeBench": 91.6,
          "terminalBench2": 69.7,
          "aaCodingIndex": 50.12,
          "terminalBenchHard": 50.8,
          "aaSciCode": 48.8
        },
        "reasoning": {
          "mrcrv2": 90.4,
          "critpt": 13.4,
          "lcr": 69
        },
        "multimodalGrounded": {
          "designArenaWebsite": 1307
        },
        "knowledge": {
          "gpqa": 92.4,
          "gpqaDiamond": 92.4,
          "hle": 41.4,
          "mmluPro": 89.6,
          "mmluRedux": 95,
          "superGpqa": 73.6,
          "mmmlu": 90.3,
          "artificialAnalysis": 56.58,
          "aaGpqaDiamond": 92.3,
          "aaHle": 38.1,
          "aaOmniscienceIndex": 14.1,
          "omniscienceAccuracy": 30.1,
          "omniscienceHallucinationRate": 22.9
        },
        "multilingual": {
          "mmluProX": 87,
          "nova63": 59,
          "include": 86.2,
          "maxife": 89.2,
          "polyMath": 86.5
        },
        "instructionFollowing": {
          "ifeval": 94.3,
          "ifBench": 79.1,
          "aaIfBench": 80.5
        },
        "math": {
          "hmmtFeb2026": 97.1,
          "imoAnswerBench": 90,
          "apex": 44.5
        }
      }
    },
    {
      "slug": "gpt-5-4-pro",
      "canonicalModelKey": "gpt-5-4-pro",
      "model": "GPT-5.4 Pro",
      "creator": "OpenAI",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "1.05M",
      "contextWindowTokens": 1050000,
      "displayScore": 90,
      "rankingEligible": true,
      "overallRank": 6,
      "url": "https://benchlm.ai/models/gpt-5-4-pro",
      "markdownUrl": "https://benchlm.ai/md/models/gpt-5-4-pro.md",
      "id": 40,
      "releaseDate": "2026-03-05",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "gpt-5-4",
        "familyName": "GPT-5.4",
        "variantType": "pro",
        "snapshotLabel": null,
        "baseFamilyModelKey": "gpt-5-4",
        "relatedModelKeys": [
          "gpt-5-4",
          "gpt-5-4-mini",
          "gpt-5-4-nano"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 90,
        "overallScore": 89,
        "rawOverallScore": 89,
        "verifiedDisplayScore": 79,
        "displayCategoryScores": {
          "agentic": 89.6,
          "coding": 90.6,
          "reasoning": 97.5,
          "multimodalGrounded": 100,
          "knowledge": 59.9,
          "multilingual": null,
          "instructionFollowing": 93.7,
          "math": 92.8
        },
        "verifiedDisplayCategoryScores": {
          "agentic": 89.3,
          "coding": null,
          "reasoning": 83.3,
          "multimodalGrounded": 94,
          "knowledge": 49,
          "multilingual": null,
          "instructionFollowing": null,
          "math": 50
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 6,
        "categoryRanks": {
          "agentic": 7,
          "instructionFollowing": 9
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": true,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 9,
        "verifiedBenchmarkCount": 9,
        "rankableBenchmarkCount": 14,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 2
      },
      "benchmarks": {
        "agentic": {
          "browseComp": 89.3
        },
        "coding": {},
        "reasoning": {
          "arcAgi2": 83.3,
          "critpt": 30
        },
        "multimodalGrounded": {
          "mmmuPro": 94
        },
        "knowledge": {
          "hle": 58.7,
          "frontierScience": 36.7,
          "frontierScienceResearch": 36.7,
          "hleNoTools": 42.7
        },
        "multilingual": {},
        "instructionFollowing": {},
        "math": {
          "ipho2025Theory": 93.5,
          "frontierMath": 50
        }
      }
    },
    {
      "slug": "gpt-5-5",
      "canonicalModelKey": "gpt-5-5",
      "model": "GPT-5.5",
      "creator": "OpenAI",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 89,
      "rankingEligible": true,
      "overallRank": 7,
      "url": "https://benchlm.ai/models/gpt-5-5",
      "markdownUrl": "https://benchlm.ai/md/models/gpt-5-5.md",
      "id": 32,
      "releaseDate": "2026-04-23",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "gpt-5-5",
        "familyName": "GPT-5.5",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "gpt-5-5",
        "relatedModelKeys": [
          "gpt-5-5-pro"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": "gpt-5-4"
      },
      "scores": {
        "displayScore": 89,
        "overallScore": 88,
        "rawOverallScore": 88,
        "verifiedDisplayScore": 72,
        "displayCategoryScores": {
          "agentic": 95.9,
          "coding": 82.6,
          "reasoning": 96.3,
          "multimodalGrounded": 58,
          "knowledge": 95.6,
          "multilingual": null,
          "instructionFollowing": null,
          "math": 96.4
        },
        "verifiedDisplayCategoryScores": {
          "agentic": 81.5,
          "coding": 58.6,
          "reasoning": 85,
          "multimodalGrounded": 70.4,
          "knowledge": 66.4,
          "multilingual": null,
          "instructionFollowing": null,
          "math": 51.7
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": true,
        "overallRank": 7,
        "categoryRanks": {
          "agentic": 4,
          "multimodalGrounded": 59
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": true,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 22,
        "verifiedBenchmarkCount": 22,
        "rankableBenchmarkCount": 23,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 3
      },
      "benchmarks": {
        "agentic": {
          "terminalBench2": 82,
          "cyberGym": 81.8,
          "browseComp": 84.4,
          "osWorldVerified": 78.7,
          "mcpAtlas": 75.3,
          "toolathlon": 55.6,
          "tau2Bench": 93.9,
          "aaAgenticIndex": 74.12,
          "apexAgentsAa": 37.7,
          "gdpvalAaNormalized": 63.5,
          "gdpvalAa": 1769,
          "gertLabs": 72.93
        },
        "coding": {
          "swePro": 58.6,
          "terminalBench2": 82,
          "vibeCodeBench": 69.847,
          "reactNativeEvals": 84.7,
          "cursorBench31": 59.2,
          "aaCodingIndex": 59.12,
          "terminalBenchHard": 60.6,
          "aaSciCode": 56.1
        },
        "reasoning": {
          "mrcrv2_64_128": 83.1,
          "mrcrv2_128_256": 87.5,
          "arcAgi2": 85,
          "lcr": 74.3,
          "critpt": 27.1
        },
        "multimodalGrounded": {
          "mmmuPro": 81.2,
          "mmmuProPython": 83.2,
          "officeQaPro": 54.1,
          "aaMmmuPro": 79.9,
          "designArenaWebsite": 1297
        },
        "knowledge": {
          "gpqa": 93.6,
          "gpqaDiamond": 93.6,
          "hle": 52.2,
          "hleNoTools": 41.4,
          "artificialAnalysis": 60.24,
          "aaGpqaDiamond": 93.5,
          "aaHle": 44.3,
          "aaOmniscienceIndex": 20.1,
          "omniscienceAccuracy": 56.9,
          "omniscienceHallucinationRate": 85.5
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 75.9
        },
        "math": {
          "frontierMath": 51.7
        }
      }
    },
    {
      "slug": "gemini-3-pro-deep-think",
      "canonicalModelKey": "gemini-3-pro-deep-think",
      "model": "Gemini 3 Pro Deep Think",
      "creator": "Google",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "2M",
      "contextWindowTokens": 2000000,
      "displayScore": 89,
      "rankingEligible": true,
      "overallRank": 8,
      "url": "https://benchlm.ai/models/gemini-3-pro-deep-think",
      "markdownUrl": "https://benchlm.ai/md/models/gemini-3-pro-deep-think.md",
      "id": 12,
      "releaseDate": "2026-02-12",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "gemini-3-pro",
        "familyName": "Gemini 3 Pro",
        "variantType": "reasoning",
        "snapshotLabel": null,
        "baseFamilyModelKey": "gemini-3-pro",
        "relatedModelKeys": [
          "gemini-3-pro"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 89,
        "overallScore": 87,
        "rawOverallScore": 87,
        "verifiedDisplayScore": 45,
        "displayCategoryScores": {
          "agentic": 93.9,
          "coding": 73.4,
          "reasoning": 88.5,
          "multimodalGrounded": 100,
          "knowledge": 86.7,
          "multilingual": 82.1,
          "instructionFollowing": 81.6,
          "math": 94.8
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": 45.1,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 8,
        "categoryRanks": {
          "agentic": 6,
          "coding": 35,
          "reasoning": 6,
          "multimodalGrounded": 1,
          "knowledge": 9,
          "multilingual": 18,
          "instructionFollowing": 33,
          "math": 6
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 1,
        "verifiedBenchmarkCount": 1,
        "rankableBenchmarkCount": 33,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "gdpvalAaNormalized": 41.2,
          "gdpvalAa": 1324
        },
        "coding": {},
        "reasoning": {
          "arcAgi2": 45.1,
          "critpt": 25.7
        },
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "grok-4-1",
      "canonicalModelKey": "grok-4-1",
      "model": "Grok 4.1",
      "creator": "xAI",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 89,
      "rankingEligible": true,
      "overallRank": 9,
      "url": "https://benchlm.ai/models/grok-4-1",
      "markdownUrl": "https://benchlm.ai/md/models/grok-4-1.md",
      "id": 7,
      "releaseDate": "2025-11-17",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "grok-4-1",
        "familyName": "Grok 4.1",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "grok-4-1",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 89,
        "overallScore": 89,
        "rawOverallScore": 89,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 76.6,
          "coding": 81.7,
          "reasoning": 97.6,
          "multimodalGrounded": 97.8,
          "knowledge": 93.7,
          "multilingual": 96.9,
          "instructionFollowing": 90.6,
          "math": 99.4
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 9,
        "categoryRanks": {
          "agentic": 18,
          "coding": 20,
          "reasoning": 1,
          "multimodalGrounded": 3,
          "knowledge": 5,
          "multilingual": 6,
          "instructionFollowing": 19,
          "math": 2
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 37,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "gpt-5-4",
      "canonicalModelKey": "gpt-5-4",
      "model": "GPT-5.4",
      "creator": "OpenAI",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "1.05M",
      "contextWindowTokens": 1050000,
      "displayScore": 88,
      "rankingEligible": true,
      "overallRank": 10,
      "url": "https://benchlm.ai/models/gpt-5-4",
      "markdownUrl": "https://benchlm.ai/md/models/gpt-5-4.md",
      "id": 11,
      "releaseDate": "2026-03-05",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "gpt-5-4",
        "familyName": "GPT-5.4",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "gpt-5-4",
        "relatedModelKeys": [
          "gpt-5-4-pro",
          "gpt-5-4-mini",
          "gpt-5-4-nano"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 88,
        "overallScore": 88,
        "rawOverallScore": 88,
        "verifiedDisplayScore": 66,
        "displayCategoryScores": {
          "agentic": 84.5,
          "coding": 87.2,
          "reasoning": 95.6,
          "multimodalGrounded": 59.6,
          "knowledge": 98.5,
          "multilingual": 100,
          "instructionFollowing": 96,
          "math": 94.4
        },
        "verifiedDisplayCategoryScores": {
          "agentic": 77,
          "coding": 57.7,
          "reasoning": null,
          "multimodalGrounded": 72.7,
          "knowledge": 52.1,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": true,
        "overallRank": 10,
        "categoryRanks": {
          "agentic": 11,
          "coding": 10,
          "reasoning": 3,
          "multimodalGrounded": 52,
          "knowledge": 3,
          "multilingual": 4,
          "instructionFollowing": 4,
          "math": 7
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 26,
        "verifiedBenchmarkCount": 26,
        "rankableBenchmarkCount": 63,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 4
      },
      "benchmarks": {
        "agentic": {
          "terminalBench2": 75.1,
          "cyberGym": 79,
          "browseComp": 82.7,
          "osWorldVerified": 75,
          "mcpAtlas": 70.6,
          "toolathlon": 54.6,
          "tau2Bench": 87.1,
          "clawEval": 60.3,
          "deepSearchQa": 73.6,
          "aaAgenticIndex": 67.96,
          "apexAgentsAa": 33.3,
          "gdpvalAaNormalized": 58.7,
          "gdpvalAa": 1674,
          "gertLabs": 64.89
        },
        "coding": {
          "liveCodeBenchPro": 87.5,
          "swePro": 57.7,
          "reactNativeEvals": 85.3,
          "vibeCodeBench": 67.421,
          "aaCodingIndex": 57.25,
          "terminalBenchHard": 57.6,
          "aaSciCode": 56.6
        },
        "reasoning": {
          "lcr": 74,
          "critpt": 23.4
        },
        "multimodalGrounded": {
          "mmmuPro": 81.2,
          "officeQaPro": 53.2,
          "mmmuProPython": 82.1,
          "charxiv": 82.8,
          "erqa": 65.4,
          "simpleVqa": 61.1,
          "screenSpotPro": 85.4,
          "zeroBench": 41,
          "medXpertQaMm": 77.1,
          "gdpvalAa": 1672,
          "aaMmmuPro": 78.4,
          "designArenaWebsite": 1269
        },
        "knowledge": {
          "gpqa": 92.8,
          "hle": 52.1,
          "hleNoTools": 39.8,
          "gpqaDiamond": 92.8,
          "healthBenchHard": 40.1,
          "medXpertQaText": 59.6,
          "artificialAnalysis": 56.8,
          "aaGpqaDiamond": 92,
          "aaHle": 41.6,
          "aaOmniscienceIndex": 5.7,
          "omniscienceAccuracy": 50,
          "omniscienceHallucinationRate": 88.6
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 73.9
        },
        "math": {},
        "korean": {}
      }
    },
    {
      "slug": "qwen3-7-plus",
      "canonicalModelKey": "qwen3-7-plus",
      "model": "Qwen3.7 Plus",
      "creator": "Alibaba",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 88,
      "rankingEligible": true,
      "overallRank": 11,
      "url": "https://benchlm.ai/models/qwen3-7-plus",
      "markdownUrl": "https://benchlm.ai/md/models/qwen3-7-plus.md",
      "id": 249,
      "releaseDate": "2026-06-03",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "qwen3-7-plus",
        "familyName": "Qwen3.7 Plus",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "qwen3-7-plus",
        "relatedModelKeys": [
          "qwen3-7-max",
          "qwen3-6-plus",
          "qwen3-6-max-preview"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": "qwen3-6-plus"
      },
      "scores": {
        "displayScore": 88,
        "overallScore": 86,
        "rawOverallScore": 86,
        "verifiedDisplayScore": 78,
        "displayCategoryScores": {
          "agentic": 85.3,
          "coding": 87.7,
          "reasoning": 100,
          "multimodalGrounded": 76,
          "knowledge": 79.2,
          "multilingual": 79.6,
          "instructionFollowing": 95.6,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": 71.7,
          "coding": 71.1,
          "reasoning": 91.7,
          "multimodalGrounded": 81.1,
          "knowledge": 67.9,
          "multilingual": 85.4,
          "instructionFollowing": 89.2,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": true,
        "overallRank": 11,
        "categoryRanks": {
          "coding": 9,
          "multimodalGrounded": 22,
          "knowledge": 20,
          "multilingual": 21,
          "instructionFollowing": 6
        },
        "categoryRankingEligible": {
          "agentic": false,
          "coding": true,
          "reasoning": false,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 50,
        "verifiedBenchmarkCount": 50,
        "rankableBenchmarkCount": 50,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 4
      },
      "benchmarks": {
        "agentic": {
          "terminalBench2": 70.3,
          "qwenClawBench": 61.8,
          "qwenWebBench": 1536,
          "clawEval": 62.7,
          "bfclV4": 72.9,
          "mcpAtlas": 73.2,
          "vitaBench": 45.6,
          "deepPlanning": 62.3,
          "osWorldVerified": 73.3,
          "androidWorld": 81,
          "aaAgenticIndex": 65.13,
          "apexAgentsAa": 22.4,
          "tau2Bench": 93,
          "gdpvalAaNormalized": 50.9,
          "gdpvalAa": 1518
        },
        "coding": {
          "terminalBench2": 70.3,
          "sweVerified": 77.7,
          "swePro": 57.6,
          "sweMultilingual": 75.8,
          "nl2Repo": 41.1,
          "sciCode": 51.3,
          "liveCodeBench": 89.6,
          "aaCodingIndex": 46.48,
          "terminalBenchHard": 47,
          "aaSciCode": 45.5
        },
        "reasoning": {
          "critpt": 9.1,
          "mrcrv2": 91.7,
          "lcr": 65
        },
        "multimodalGrounded": {
          "mmmuPro": 79,
          "mathVision": 90.3,
          "charxiv": 85.9,
          "erqa": 69.8,
          "medXpertQaMm": 71,
          "screenSpotPro": 79,
          "simpleVqa": 81.7,
          "mmSearchPlus": 41.4,
          "realWorldQa": 86.9,
          "omniDocBench15": 91.4,
          "ocrBenchV2": 70.7,
          "odinw13": 51.1,
          "videoMmeWithSub": 88,
          "videoMmmu": 85.4,
          "mlvuAvg": 87.4,
          "aaMmmuPro": 44.8
        },
        "knowledge": {
          "gpqa": 90.3,
          "gpqaDiamond": 90.3,
          "hle": 34.7,
          "mmluPro": 88.5,
          "mmluRedux": 94.5,
          "superGpqa": 71.4,
          "mmmlu": 89,
          "artificialAnalysis": 53.25,
          "aaGpqaDiamond": 90,
          "aaHle": 33.4,
          "aaOmniscienceIndex": 2.4,
          "omniscienceAccuracy": 22.2,
          "omniscienceHallucinationRate": 25.5
        },
        "multilingual": {
          "mmluProX": 85.4,
          "nova63": 58.8,
          "include": 83,
          "maxife": 88.8,
          "polyMath": 84
        },
        "instructionFollowing": {
          "ifeval": 94.6,
          "ifBench": 79.1,
          "aaIfBench": 78
        },
        "math": {
          "hmmtFeb2026": 92.9,
          "imoAnswerBench": 86,
          "apex": 22.7
        }
      }
    },
    {
      "slug": "claude-opus-4-6",
      "canonicalModelKey": "claude-opus-4-6",
      "model": "Claude Opus 4.6",
      "creator": "Anthropic",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 86,
      "rankingEligible": true,
      "overallRank": 12,
      "url": "https://benchlm.ai/models/claude-opus-4-6",
      "markdownUrl": "https://benchlm.ai/md/models/claude-opus-4-6.md",
      "id": 18,
      "releaseDate": "2026-02-01",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "claude-opus-4-6",
        "familyName": "Claude Opus 4.6",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "claude-opus-4-6",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 86,
        "overallScore": 86,
        "rawOverallScore": 86,
        "verifiedDisplayScore": 72,
        "displayCategoryScores": {
          "agentic": 81.3,
          "coding": 85.5,
          "reasoning": 88.2,
          "multimodalGrounded": 76.7,
          "knowledge": 90.1,
          "multilingual": 100,
          "instructionFollowing": 95.3,
          "math": 86.3
        },
        "verifiedDisplayCategoryScores": {
          "agentic": 72.6,
          "coding": 64.4,
          "reasoning": null,
          "multimodalGrounded": 77.3,
          "knowledge": 76.2,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": true,
        "overallRank": 12,
        "categoryRanks": {
          "agentic": 14,
          "coding": 13,
          "reasoning": 8,
          "multimodalGrounded": 21,
          "knowledge": 8,
          "multilingual": 5,
          "instructionFollowing": 8,
          "math": 18
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 27,
        "verifiedBenchmarkCount": 27,
        "rankableBenchmarkCount": 73,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 4
      },
      "benchmarks": {
        "agentic": {
          "terminalBench2": 65.4,
          "browseComp": 83.7,
          "osWorldVerified": 72.7,
          "tau2Bench": 84.8,
          "clawEval": 70.4,
          "deepSearchQa": 73.7,
          "cyberGym": 66.6,
          "aaAgenticIndex": 64.22,
          "gdpvalAaNormalized": 54.5,
          "gdpvalAa": 1589,
          "gertLabs": 61.85
        },
        "coding": {
          "sweVerified": 80.84,
          "sweVerifiedArcee": 75.6,
          "liveCodeBenchPro": 70.7,
          "swePro": 53.4,
          "sweRebench": 65.3,
          "reactNativeEvals": 84.1,
          "vibeCodeBench": 57.573,
          "aaCodingIndex": 47.56,
          "terminalBenchHard": 48.5,
          "aaSciCode": 45.7
        },
        "reasoning": {
          "lcr": 58.3,
          "critpt": 2.8
        },
        "multimodalGrounded": {
          "mmmuPro": 77.3,
          "erqa": 51.6,
          "screenSpotPro": 83.1,
          "medXpertQaMm": 64.8,
          "gdpvalAa": 1606,
          "aaMmmuPro": 72.5,
          "designArenaWebsite": 1340
        },
        "knowledge": {
          "gpqa": 91.3,
          "gpqaDiamond": 89.2,
          "superGpqa": 95,
          "mmluPro": 82,
          "mmluProArcee": 89.1,
          "hle": 53,
          "hleNoTools": 40,
          "healthBenchHard": 14.8,
          "medXpertQaText": 52.1,
          "artificialAnalysis": 46.46,
          "aaGpqaDiamond": 84,
          "aaHle": 18.6,
          "aaOmniscienceIndex": 3.5,
          "omniscienceAccuracy": 45.2,
          "omniscienceHallucinationRate": 76
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 44.6
        },
        "math": {
          "aime2025Arcee": 99.8
        }
      }
    },
    {
      "slug": "gemini-3-5-flash",
      "canonicalModelKey": "gemini-3-5-flash",
      "model": "Gemini 3.5 Flash",
      "creator": "Google",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 86,
      "rankingEligible": true,
      "overallRank": 13,
      "url": "https://benchlm.ai/models/gemini-3-5-flash",
      "markdownUrl": "https://benchlm.ai/md/models/gemini-3-5-flash.md",
      "id": 38,
      "releaseDate": "2026-05-19",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "gemini-3-5-flash",
        "familyName": "Gemini 3.5 Flash",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "gemini-3-5-flash",
        "relatedModelKeys": [
          "gemini-3-flash",
          "gemini-3-1-pro"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": "gemini-3-flash"
      },
      "scores": {
        "displayScore": 86,
        "overallScore": 83,
        "rawOverallScore": 83,
        "verifiedDisplayScore": 70,
        "displayCategoryScores": {
          "agentic": 95.1,
          "coding": 77.3,
          "reasoning": 79.4,
          "multimodalGrounded": 80,
          "knowledge": 82,
          "multilingual": null,
          "instructionFollowing": 78,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": 77.2,
          "coding": 54.5,
          "reasoning": 74.7,
          "multimodalGrounded": 83.8,
          "knowledge": 58,
          "multilingual": null,
          "instructionFollowing": 76.3,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": true,
        "overallRank": 13,
        "categoryRanks": {
          "agentic": 5,
          "coding": 30,
          "reasoning": 17,
          "multimodalGrounded": 17,
          "instructionFollowing": 42
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": true,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 20,
        "verifiedBenchmarkCount": 20,
        "rankableBenchmarkCount": 21,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 3
      },
      "benchmarks": {
        "agentic": {
          "terminalBench2": 76.2,
          "mcpAtlas": 83.6,
          "toolathlon": 56.5,
          "osWorldVerified": 78.4,
          "financeAgentV2": 57.861,
          "gdpvalAa": 1656,
          "tau2Bench": 95.3,
          "gdpvalAaNormalized": 57.8,
          "aaAgenticIndex": 70.3,
          "apexAgentsAa": 47.1,
          "gertLabs": 61.85
        },
        "coding": {
          "terminalBench2": 76.2,
          "terminalBenchHard": 40.9,
          "swePro": 55.1,
          "sciCode": 53.1,
          "vibeCodeBench": 48.683,
          "cursorBench31": 49.8,
          "aaCodingIndex": 44.98,
          "aaSciCode": 53.1
        },
        "reasoning": {
          "mrcrv2": 77.3,
          "mrcr1m": 26.6,
          "arcAgi2": 72.1,
          "lcr": 69.3,
          "critpt": 13.1
        },
        "multimodalGrounded": {
          "charxiv": 84.2,
          "mmmuPro": 83.6,
          "blueprintBench2": 33.6,
          "aaMmmuPro": 84.3,
          "designArenaWebsite": 1292
        },
        "knowledge": {
          "artificialAnalysis": 55.33,
          "gpqa": 92.2,
          "gpqaDiamond": 92.676,
          "hle": 40.2,
          "omniscienceAccuracy": 51.9,
          "omniscienceHallucinationRate": 60.7,
          "aaGpqaDiamond": 92.2,
          "aaHle": 41,
          "aaOmniscienceIndex": 22.7
        },
        "multilingual": {},
        "instructionFollowing": {
          "ifBench": 76.3,
          "aaIfBench": 76.3
        },
        "math": {}
      }
    },
    {
      "slug": "deepseek-v4-pro-max",
      "canonicalModelKey": "deepseek-v4-pro-max",
      "model": "DeepSeek V4 Pro (Max)",
      "creator": "DeepSeek",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 86,
      "rankingEligible": true,
      "overallRank": 14,
      "url": "https://benchlm.ai/models/deepseek-v4-pro-max",
      "markdownUrl": "https://benchlm.ai/md/models/deepseek-v4-pro-max.md",
      "id": 29,
      "releaseDate": "2026-04-24",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "deepseek-v4",
        "familyName": "DeepSeek V4",
        "variantType": "pro-reasoning",
        "snapshotLabel": "max",
        "baseFamilyModelKey": "deepseek-v4-pro-max",
        "relatedModelKeys": [
          "deepseek-v4-flash-base",
          "deepseek-v4-pro-base",
          "deepseek-v4-flash",
          "deepseek-v4-flash-high",
          "deepseek-v4-flash-max",
          "deepseek-v4-pro",
          "deepseek-v4-pro-high"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 86,
        "overallScore": 85,
        "rawOverallScore": 85,
        "verifiedDisplayScore": 73,
        "displayCategoryScores": {
          "agentic": 87.7,
          "coding": 89.7,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 76.6,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": 74,
          "coding": 75.9,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 66.1,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": true,
        "overallRank": 14,
        "categoryRanks": {
          "coding": 8,
          "knowledge": 26
        },
        "categoryRankingEligible": {
          "agentic": false,
          "coding": true,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": true,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 24,
        "verifiedBenchmarkCount": 24,
        "rankableBenchmarkCount": 25,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 2
      },
      "benchmarks": {
        "agentic": {
          "terminalBench2": 67.9,
          "browseComp": 83.4,
          "hleWithTools": 48.2,
          "mcpAtlas": 73.6,
          "gdpvalAa": 1554,
          "toolathlon": 51.8,
          "aaAgenticIndex": 67.19,
          "apexAgentsAa": 24.3,
          "tau2Bench": 96.2,
          "gdpvalAaNormalized": 52.7
        },
        "coding": {
          "liveCodeBench": 93.5,
          "codeforces": 3206,
          "sweVerified": 80.6,
          "swePro": 55.4,
          "sweMultilingual": 76.2,
          "terminalBench2": 67.9,
          "vibeCodeBench": 49.931,
          "aaCodingIndex": 47.47,
          "terminalBenchHard": 46.2,
          "aaSciCode": 50
        },
        "reasoning": {
          "mrcr1m": 83.5,
          "corpusQa1m": 62,
          "lcr": 66.3,
          "critpt": 12.9
        },
        "multimodalGrounded": {
          "designArenaWebsite": 1286
        },
        "knowledge": {
          "mmluPro": 87.5,
          "simpleQa": 57.9,
          "chineseSimpleQa": 84.4,
          "gpqa": 90.1,
          "gpqaDiamond": 90.1,
          "hle": 37.7,
          "artificialAnalysis": 51.51,
          "aaGpqaDiamond": 88.8,
          "aaHle": 35.9,
          "aaOmniscienceIndex": -10,
          "omniscienceAccuracy": 43.3,
          "omniscienceHallucinationRate": 94
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 76.5
        },
        "math": {
          "hmmtFeb2026": 95.2,
          "imoAnswerBench": 89.8,
          "apex": 38.3,
          "apexShortlist": 90.2
        }
      }
    },
    {
      "slug": "gpt-5-3-codex",
      "canonicalModelKey": "gpt-5-3-codex",
      "model": "GPT-5.3 Codex",
      "creator": "OpenAI",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "400K",
      "contextWindowTokens": 400000,
      "displayScore": 85,
      "rankingEligible": true,
      "overallRank": 15,
      "url": "https://benchlm.ai/models/gpt-5-3-codex",
      "markdownUrl": "https://benchlm.ai/md/models/gpt-5-3-codex.md",
      "id": 5,
      "releaseDate": "2026-02-05",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "gpt-5-3-codex",
        "familyName": "GPT-5.3 Codex",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "gpt-5-3-codex",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 85,
        "overallScore": 91,
        "rawOverallScore": 91,
        "verifiedDisplayScore": 67,
        "displayCategoryScores": {
          "agentic": 77.4,
          "coding": 87.1,
          "reasoning": 93,
          "multimodalGrounded": 95.8,
          "knowledge": 92,
          "multilingual": 96.9,
          "instructionFollowing": 91.4,
          "math": 100
        },
        "verifiedDisplayCategoryScores": {
          "agentic": 71.5,
          "coding": 63.1,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 15,
        "categoryRanks": {
          "agentic": 16,
          "coding": 11,
          "reasoning": 4,
          "multimodalGrounded": 5,
          "knowledge": 6,
          "multilingual": 7,
          "instructionFollowing": 18,
          "math": 1
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 6,
        "verifiedBenchmarkCount": 6,
        "rankableBenchmarkCount": 39,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "terminalBench2": 77.3,
          "osWorldVerified": 64.7,
          "aaAgenticIndex": 60.54,
          "tau2Bench": 86,
          "gdpvalAaNormalized": 49,
          "gdpvalAa": 1480,
          "gertLabs": 57.47
        },
        "coding": {
          "sweVerified": 85,
          "swePro": 56.8,
          "sweRebench": 58.2,
          "vibeCodeBench": 61.767,
          "aaCodingIndex": 53.1,
          "terminalBenchHard": 53,
          "aaSciCode": 53.2
        },
        "reasoning": {
          "lcr": 74,
          "critpt": 16.9
        },
        "multimodalGrounded": {
          "aaMmmuPro": 78.5,
          "designArenaWebsite": 1208
        },
        "knowledge": {
          "artificialAnalysis": 53.56,
          "aaGpqaDiamond": 91.5,
          "aaHle": 39.9,
          "aaOmniscienceIndex": 9.9,
          "omniscienceAccuracy": 51.8,
          "omniscienceHallucinationRate": 86.9
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 75.4
        },
        "math": {}
      }
    },
    {
      "slug": "claude-opus-4-7-adaptive",
      "canonicalModelKey": "claude-opus-4-7-max",
      "model": "Claude Opus 4.7 (Adaptive)",
      "creator": "Anthropic",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 84,
      "rankingEligible": true,
      "overallRank": 16,
      "url": "https://benchlm.ai/models/claude-opus-4-7-adaptive",
      "markdownUrl": "https://benchlm.ai/md/models/claude-opus-4-7-adaptive.md",
      "id": 33,
      "releaseDate": "2026-04-16",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "claude-opus-4-7",
        "familyName": "Claude Opus 4.7",
        "variantType": "reasoning",
        "snapshotLabel": "adaptive",
        "baseFamilyModelKey": "claude-opus-4-7",
        "relatedModelKeys": [
          "claude-opus-4-7",
          "claude-opus-4-6",
          "claude-opus-4-5"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": "claude-opus-4-6"
      },
      "scores": {
        "displayScore": 84,
        "overallScore": 84,
        "rawOverallScore": 84,
        "verifiedDisplayScore": 71,
        "displayCategoryScores": {
          "agentic": 86.2,
          "coding": 93.5,
          "reasoning": 90.7,
          "multimodalGrounded": 45,
          "knowledge": 99.6,
          "multilingual": null,
          "instructionFollowing": null,
          "math": 71.2
        },
        "verifiedDisplayCategoryScores": {
          "agentic": 74.9,
          "coding": 72.9,
          "reasoning": 75.8,
          "multimodalGrounded": 64.3,
          "knowledge": 68.2,
          "multilingual": null,
          "instructionFollowing": null,
          "math": 43.8
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": true,
        "overallRank": 16,
        "categoryRanks": {
          "agentic": 9,
          "coding": 4,
          "multimodalGrounded": 73,
          "knowledge": 1
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": false,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 18,
        "verifiedBenchmarkCount": 18,
        "rankableBenchmarkCount": 19,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 3
      },
      "benchmarks": {
        "agentic": {
          "terminalBench2": 69.4,
          "browseComp": 79.3,
          "mcpAtlas": 77.3,
          "osWorldVerified": 78,
          "cyberGym": 73.1,
          "aaAgenticIndex": 71.29,
          "tau2Bench": 88.6,
          "gdpvalAaNormalized": 62.6,
          "gdpvalAa": 1753
        },
        "coding": {
          "sweVerified": 87.6,
          "swePro": 64.3,
          "terminalBench2": 69.4,
          "aaCodingIndex": 52.51,
          "terminalBenchHard": 51.5,
          "aaSciCode": 54.5
        },
        "reasoning": {
          "mrcrv2_128_256": 59.2,
          "arcAgi2": 75.8,
          "lcr": 70.3,
          "critpt": 12
        },
        "multimodalGrounded": {
          "officeQaPro": 43.6,
          "charxiv": 91,
          "charxivNoTools": 82.1,
          "aaMmmuPro": 78.8,
          "designArenaWebsite": 1338
        },
        "knowledge": {
          "gpqa": 94.2,
          "gpqaDiamond": 94.2,
          "hle": 54.7,
          "hleNoTools": 46.9,
          "artificialAnalysis": 57.28,
          "aaGpqaDiamond": 91.4,
          "aaHle": 39.6,
          "aaOmniscienceIndex": 26.2,
          "omniscienceAccuracy": 45.8,
          "omniscienceHallucinationRate": 36.2
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 58.6
        },
        "math": {
          "frontierMath": 43.8
        }
      }
    },
    {
      "slug": "glm-5-1",
      "canonicalModelKey": "glm-5-1",
      "model": "GLM-5.1",
      "creator": "Z.AI",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "203K",
      "contextWindowTokens": 203000,
      "displayScore": 82,
      "rankingEligible": true,
      "overallRank": 17,
      "url": "https://benchlm.ai/models/glm-5-1",
      "markdownUrl": "https://benchlm.ai/md/models/glm-5-1.md",
      "id": 43,
      "releaseDate": "2026-04-07",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "glm-5",
        "familyName": "GLM-5",
        "variantType": "snapshot",
        "snapshotLabel": "5.1",
        "baseFamilyModelKey": "glm-5",
        "relatedModelKeys": [
          "glm-5",
          "glm-5-reasoning",
          "glm-5-turbo"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": "glm-5"
      },
      "scores": {
        "displayScore": 82,
        "overallScore": 75,
        "rawOverallScore": 75,
        "verifiedDisplayScore": 61,
        "displayCategoryScores": {
          "agentic": 79.7,
          "coding": 82.8,
          "reasoning": 64.8,
          "multimodalGrounded": null,
          "knowledge": 83.9,
          "multilingual": null,
          "instructionFollowing": 92,
          "math": 89.2
        },
        "verifiedDisplayCategoryScores": {
          "agentic": 65.3,
          "coding": 60.9,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 52.3,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": true,
        "overallRank": 17,
        "categoryRanks": {
          "coding": 16,
          "reasoning": 31,
          "knowledge": 11,
          "instructionFollowing": 14,
          "math": 15
        },
        "categoryRankingEligible": {
          "agentic": false,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": false,
          "knowledge": true,
          "multilingual": false,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 16,
        "verifiedBenchmarkCount": 16,
        "rankableBenchmarkCount": 34,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 3
      },
      "benchmarks": {
        "agentic": {
          "terminalBench2": 63.5,
          "browseComp": 68,
          "tau3Bench": 70.6,
          "mcpAtlas": 71.8,
          "cyberGym": 68.7,
          "clawEval": 62.3,
          "aaAgenticIndex": 67.05,
          "tau2Bench": 97.7,
          "gdpvalAaNormalized": 51.8,
          "gertLabs": 60.11
        },
        "coding": {
          "swePro": 58.4,
          "nl2Repo": 42.7,
          "sweRebench": 62.7,
          "vibeCodeBench": 31.456,
          "aaCodingIndex": 43.37,
          "terminalBenchHard": 43.2,
          "aaSciCode": 43.8
        },
        "reasoning": {
          "lcr": 62.3,
          "critpt": 4.6
        },
        "multimodalGrounded": {
          "designArenaWebsite": 1315
        },
        "knowledge": {
          "gpqaDiamond": 86.2,
          "hle": 52.3,
          "artificialAnalysis": 51.41,
          "aaGpqaDiamond": 86.8,
          "aaHle": 28,
          "aaOmniscienceIndex": 1.9,
          "omniscienceAccuracy": 24.2,
          "omniscienceHallucinationRate": 29.4
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 76.3
        },
        "math": {
          "aime2026": 95.3,
          "hmmtNov2025": 94,
          "hmmtFeb2026": 82.6,
          "mmAnswerBench": 83.8
        }
      }
    },
    {
      "slug": "claude-sonnet-4-6",
      "canonicalModelKey": "claude-sonnet-4-6",
      "model": "Claude Sonnet 4.6",
      "creator": "Anthropic",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "200K",
      "contextWindowTokens": 200000,
      "displayScore": 82,
      "rankingEligible": true,
      "overallRank": 18,
      "url": "https://benchlm.ai/models/claude-sonnet-4-6",
      "markdownUrl": "https://benchlm.ai/md/models/claude-sonnet-4-6.md",
      "id": 27,
      "releaseDate": "2026-02-01",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "claude-sonnet-4-6",
        "familyName": "Claude Sonnet 4.6",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "claude-sonnet-4-6",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 82,
        "overallScore": 80,
        "rawOverallScore": 80,
        "verifiedDisplayScore": 69,
        "displayCategoryScores": {
          "agentic": 78.7,
          "coding": 82,
          "reasoning": 82.9,
          "multimodalGrounded": 86.2,
          "knowledge": 81.9,
          "multilingual": 88.4,
          "instructionFollowing": 82.7,
          "math": 75.7
        },
        "verifiedDisplayCategoryScores": {
          "agentic": 65.1,
          "coding": 66.4,
          "reasoning": null,
          "multimodalGrounded": 77.4,
          "knowledge": 73.7,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 18,
        "categoryRanks": {
          "agentic": 15,
          "coding": 19,
          "reasoning": 13,
          "multimodalGrounded": 12,
          "knowledge": 15,
          "multilingual": 9,
          "instructionFollowing": 31,
          "math": 28
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 14,
        "verifiedBenchmarkCount": 14,
        "rankableBenchmarkCount": 48,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 3
      },
      "benchmarks": {
        "agentic": {
          "terminalBench2": 59.1,
          "osWorldVerified": 72.1,
          "clawEval": 67.8,
          "cyberGym": 65.2,
          "aaAgenticIndex": 61.62,
          "tau2Bench": 79.5,
          "gdpvalAaNormalized": 54.8,
          "gdpvalAa": 1596,
          "gertLabs": 62.92
        },
        "coding": {
          "sweVerified": 79.6,
          "sweRebench": 60.7,
          "reactNativeEvals": 80.6,
          "vibeCodeBench": 51.476,
          "cursorBench31": 48.8,
          "aaCodingIndex": 46.43,
          "terminalBenchHard": 46.2,
          "aaSciCode": 46.9
        },
        "reasoning": {
          "lcr": 57.7,
          "critpt": 0.9
        },
        "multimodalGrounded": {
          "charxiv": 77.4,
          "aaMmmuPro": 70.6,
          "designArenaWebsite": 1327
        },
        "knowledge": {
          "gpqa": 89.9,
          "superGpqa": 95,
          "mmluPro": 79.2,
          "hle": 49,
          "artificialAnalysis": 44.38,
          "aaGpqaDiamond": 79.9,
          "aaHle": 13.2,
          "aaOmniscienceIndex": -2.9,
          "omniscienceAccuracy": 38,
          "omniscienceHallucinationRate": 65.9
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 41.2
        },
        "math": {},
        "korean": {}
      }
    },
    {
      "slug": "deepseek-v4-pro-high",
      "canonicalModelKey": "deepseek-v4-pro-high",
      "model": "DeepSeek V4 Pro (High)",
      "creator": "DeepSeek",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 82,
      "rankingEligible": true,
      "overallRank": 19,
      "url": "https://benchlm.ai/models/deepseek-v4-pro-high",
      "markdownUrl": "https://benchlm.ai/md/models/deepseek-v4-pro-high.md",
      "id": 45,
      "releaseDate": "2026-04-24",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "deepseek-v4",
        "familyName": "DeepSeek V4",
        "variantType": "pro-reasoning",
        "snapshotLabel": "high",
        "baseFamilyModelKey": "deepseek-v4-pro-max",
        "relatedModelKeys": [
          "deepseek-v4-flash-base",
          "deepseek-v4-pro-base",
          "deepseek-v4-flash",
          "deepseek-v4-flash-high",
          "deepseek-v4-flash-max",
          "deepseek-v4-pro",
          "deepseek-v4-pro-max"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 82,
        "overallScore": 80,
        "rawOverallScore": 80,
        "verifiedDisplayScore": 70,
        "displayCategoryScores": {
          "agentic": 82,
          "coding": 86,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 70.9,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": 70,
          "coding": 73.8,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 62.6,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": true,
        "overallRank": 19,
        "categoryRanks": {
          "coding": 12,
          "knowledge": 36
        },
        "categoryRankingEligible": {
          "agentic": false,
          "coding": true,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": true,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 23,
        "verifiedBenchmarkCount": 23,
        "rankableBenchmarkCount": 24,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 2
      },
      "benchmarks": {
        "agentic": {
          "terminalBench2": 63.3,
          "browseComp": 80.4,
          "hleWithTools": 44.7,
          "mcpAtlas": 74.2,
          "toolathlon": 49,
          "aaAgenticIndex": 66.65,
          "tau2Bench": 94.2,
          "gdpvalAaNormalized": 52.9,
          "gdpvalAa": 1558
        },
        "coding": {
          "liveCodeBench": 89.8,
          "codeforces": 2919,
          "sweVerified": 79.4,
          "swePro": 54.4,
          "sweMultilingual": 74.1,
          "terminalBench2": 63.3,
          "aaCodingIndex": 43.25,
          "terminalBenchHard": 41.7,
          "aaSciCode": 46.4
        },
        "reasoning": {
          "mrcr1m": 83.3,
          "corpusQa1m": 56.5,
          "lcr": 65,
          "critpt": 10
        },
        "multimodalGrounded": {
          "designArenaWebsite": 1286
        },
        "knowledge": {
          "mmluPro": 87.1,
          "simpleQa": 46.2,
          "chineseSimpleQa": 77.7,
          "gpqa": 89.1,
          "gpqaDiamond": 89.1,
          "hle": 34.5,
          "artificialAnalysis": 49.79,
          "aaGpqaDiamond": 90.5,
          "aaHle": 33.5,
          "aaOmniscienceIndex": -9.7,
          "omniscienceAccuracy": 41.8,
          "omniscienceHallucinationRate": 88.6
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 71.3
        },
        "math": {
          "hmmtFeb2026": 94,
          "imoAnswerBench": 88,
          "apex": 27.4,
          "apexShortlist": 85.5
        }
      }
    },
    {
      "slug": "o1-preview",
      "canonicalModelKey": "o1-preview",
      "model": "o1-preview",
      "creator": "OpenAI",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "200K",
      "contextWindowTokens": 200000,
      "displayScore": 82,
      "rankingEligible": true,
      "overallRank": 20,
      "url": "https://benchlm.ai/models/o1-preview",
      "markdownUrl": "https://benchlm.ai/md/models/o1-preview.md",
      "id": 20,
      "releaseDate": "2024-09-12",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "o1",
        "familyName": "o1",
        "variantType": "snapshot",
        "snapshotLabel": "preview",
        "baseFamilyModelKey": "o1",
        "relatedModelKeys": [
          "o1",
          "o1-pro"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 82,
        "overallScore": 82,
        "rawOverallScore": 82,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 87.9,
          "coding": 78.8,
          "reasoning": 88.6,
          "multimodalGrounded": 67.5,
          "knowledge": 79.8,
          "multilingual": 82,
          "instructionFollowing": 76.9,
          "math": 94.1
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 20,
        "categoryRanks": {
          "agentic": 8,
          "coding": 23,
          "reasoning": 5,
          "multimodalGrounded": 34,
          "knowledge": 19,
          "multilingual": 19,
          "instructionFollowing": 43,
          "math": 8
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 32,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {
          "aaCodingIndex": 34.05
        },
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {
          "artificialAnalysis": 23.74
        },
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "kimi-2-6",
      "canonicalModelKey": "kimi-2-6",
      "model": "Kimi K2.6",
      "creator": "Moonshot AI",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "256K",
      "contextWindowTokens": 256000,
      "displayScore": 81,
      "rankingEligible": true,
      "overallRank": 21,
      "url": "https://benchlm.ai/models/kimi-2-6",
      "markdownUrl": "https://benchlm.ai/md/models/kimi-2-6.md",
      "id": 44,
      "releaseDate": "2026-04-20",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "kimi-2-6",
        "familyName": "Kimi K2.6",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "kimi-2-6",
        "relatedModelKeys": [
          "kimi-k2-5"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": "kimi-k2-5"
      },
      "scores": {
        "displayScore": 81,
        "overallScore": 81,
        "rawOverallScore": 81,
        "verifiedDisplayScore": 70,
        "displayCategoryScores": {
          "agentic": 82.7,
          "coding": 89.8,
          "reasoning": null,
          "multimodalGrounded": 71.3,
          "knowledge": 73.2,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": 73.1,
          "coding": 72,
          "reasoning": null,
          "multimodalGrounded": 79.7,
          "knowledge": 53.8,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": true,
        "overallRank": 21,
        "categoryRanks": {
          "agentic": 12,
          "coding": 7,
          "multimodalGrounded": 29
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": false,
          "multimodalGrounded": true,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 28,
        "verifiedBenchmarkCount": 28,
        "rankableBenchmarkCount": 29,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 2
      },
      "benchmarks": {
        "agentic": {
          "terminalBench2": 66.7,
          "browseComp": 83.2,
          "osWorldVerified": 73.1,
          "toolathlon": 50,
          "mcpAtlas": 55.9,
          "clawEval": 62.3,
          "deepSearchQa": 92.5,
          "wideResearch": 80.8,
          "aaAgenticIndex": 65.97,
          "tau2Bench": 95.9,
          "gdpvalAaNormalized": 49.1,
          "gdpvalAa": 1481,
          "apexAgentsAa": 28.5,
          "gertLabs": 56.82
        },
        "coding": {
          "sweVerified": 80.2,
          "liveCodeBench": 89.6,
          "liveCodeBenchV6": 89.6,
          "swePro": 58.6,
          "sweMultilingual": 76.7,
          "sciCode": 52.2,
          "terminalBench2": 66.7,
          "vibeCodeBench": 37.891,
          "cursorBench31": 47.6,
          "aaCodingIndex": 47.12,
          "terminalBenchHard": 43.9,
          "aaSciCode": 53.5
        },
        "reasoning": {
          "lcr": 69.7,
          "critpt": 8
        },
        "multimodalGrounded": {
          "mmmuPro": 79.4,
          "mmmuProPython": 80.1,
          "charxiv": 80.4,
          "mathVision": 87.4,
          "vStar": 96.9,
          "aaMmmuPro": 79.4,
          "designArenaWebsite": 1322
        },
        "knowledge": {
          "gpqa": 90.5,
          "gpqaDiamond": 90.5,
          "hle": 34.7,
          "artificialAnalysis": 53.9,
          "aaGpqaDiamond": 91.1,
          "aaHle": 35.9,
          "aaOmniscienceIndex": 6.4,
          "omniscienceAccuracy": 32.8,
          "omniscienceHallucinationRate": 39.3
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 76
        },
        "math": {
          "aime2026": 96.4,
          "hmmtFeb2026": 92.7,
          "mmAnswerBench": 86
        }
      }
    },
    {
      "slug": "gemini-3-pro",
      "canonicalModelKey": "gemini-3-pro",
      "model": "Gemini 3 Pro",
      "creator": "Google",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "2M",
      "contextWindowTokens": 2000000,
      "displayScore": 80,
      "rankingEligible": true,
      "overallRank": 22,
      "url": "https://benchlm.ai/models/gemini-3-pro",
      "markdownUrl": "https://benchlm.ai/md/models/gemini-3-pro.md",
      "id": 34,
      "releaseDate": "2025-11-18",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "gemini-3-pro",
        "familyName": "Gemini 3 Pro",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "gemini-3-pro",
        "relatedModelKeys": [
          "gemini-3-pro-deep-think"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 80,
        "overallScore": 74,
        "rawOverallScore": 74,
        "verifiedDisplayScore": 52,
        "displayCategoryScores": {
          "agentic": 70.6,
          "coding": 72.4,
          "reasoning": 82.4,
          "multimodalGrounded": 81.2,
          "knowledge": 82.3,
          "multilingual": 79.2,
          "instructionFollowing": 79.5,
          "math": 80.3
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": 31.1,
          "multimodalGrounded": 81.1,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 22,
        "categoryRanks": {
          "agentic": 22,
          "coding": 37,
          "reasoning": 14,
          "multimodalGrounded": 16,
          "knowledge": 13,
          "multilingual": 22,
          "instructionFollowing": 39,
          "math": 23
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 8,
        "verifiedBenchmarkCount": 8,
        "rankableBenchmarkCount": 63,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 2
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 51.98,
          "tau2Bench": 87.1,
          "gdpvalAaNormalized": 34.2,
          "gdpvalAa": 1184,
          "gertLabs": 63.23
        },
        "coding": {
          "vibeCodeBench": 14.3,
          "aaCodingIndex": 46.49,
          "terminalBenchHard": 41.7,
          "aaSciCode": 56.1
        },
        "reasoning": {
          "arcAgi2": 31.1,
          "lcr": 70.7,
          "critpt": 9.1
        },
        "multimodalGrounded": {
          "mmmuPro": 81,
          "mathVision": 86.6,
          "videoMmmu": 87.6,
          "screenSpotPro": 72.7,
          "charxiv": 81.4,
          "vStar": 88,
          "aaMmmuPro": 80.2
        },
        "knowledge": {
          "artificialAnalysis": 48.39,
          "aaGpqaDiamond": 90.8,
          "aaHle": 37.2,
          "aaOmniscienceIndex": 15.8,
          "omniscienceAccuracy": 55.9,
          "omniscienceHallucinationRate": 90.9
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 70.4
        },
        "math": {}
      }
    },
    {
      "slug": "minimax-m3",
      "canonicalModelKey": "minimax-m3",
      "model": "MiniMax M3",
      "creator": "MiniMax",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 79,
      "rankingEligible": true,
      "overallRank": 23,
      "url": "https://benchlm.ai/models/minimax-m3",
      "markdownUrl": "https://benchlm.ai/md/models/minimax-m3.md",
      "id": 241,
      "releaseDate": "2026-06-01",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "minimax-m3",
        "familyName": "MiniMax M3",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "minimax-m3",
        "relatedModelKeys": [
          "minimax-m2-7",
          "minimax-m2-5"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": "minimax-m2-7"
      },
      "scores": {
        "displayScore": 79,
        "overallScore": 76,
        "rawOverallScore": 76,
        "verifiedDisplayScore": 69,
        "displayCategoryScores": {
          "agentic": 85.3,
          "coding": 84.6,
          "reasoning": null,
          "multimodalGrounded": 48.1,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": 71.9,
          "coding": 67,
          "reasoning": null,
          "multimodalGrounded": 64.9,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": true,
        "overallRank": 23,
        "categoryRanks": {
          "agentic": 10,
          "multimodalGrounded": 70
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": true,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 15,
        "verifiedBenchmarkCount": 15,
        "rankableBenchmarkCount": 16,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 2
      },
      "benchmarks": {
        "agentic": {
          "terminalBench2": 66,
          "browseComp": 83.52,
          "osWorldVerified": 70.06,
          "mcpAtlas": 74.2,
          "clawEval": 74.5,
          "aaAgenticIndex": 68.62,
          "tau2Bench": 88.9,
          "gdpvalAaNormalized": 58.5,
          "gdpvalAa": 1670,
          "gdpvalRubrics": 74.7,
          "bankerToolBench": 76.1
        },
        "coding": {
          "sweVerified": 80.5,
          "swePro": 59,
          "terminalBench2": 66,
          "nl2Repo": 42.13,
          "aaCodingIndex": 43.41,
          "terminalBenchHard": 42.4,
          "aaSciCode": 45.4,
          "vibeV2": 50.1,
          "svgBench": 63.7,
          "kernelBenchHard": 28.8
        },
        "reasoning": {
          "lcr": 74,
          "critpt": 3.7
        },
        "multimodalGrounded": {
          "officeQaPro": 45.1,
          "omniDocBench15": 91.6,
          "mmmuPro": 78.1,
          "videoMmmu": 84.6,
          "videoMmeWithSub": 85.4,
          "aaMmmuPro": 79.9,
          "designArenaWebsite": 1312
        },
        "knowledge": {
          "artificialAnalysis": 54.67,
          "aaGpqaDiamond": 92.9,
          "aaHle": 37.1,
          "aaOmniscienceIndex": 1.4,
          "omniscienceAccuracy": 15,
          "omniscienceHallucinationRate": 16.1
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 82.9
        },
        "math": {
          "usamo2026": 85.71
        }
      }
    },
    {
      "slug": "glm-5-reasoning",
      "canonicalModelKey": "glm-5-reasoning",
      "model": "GLM-5 (Reasoning)",
      "creator": "Z.AI",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "200K",
      "contextWindowTokens": 200000,
      "displayScore": 79,
      "rankingEligible": true,
      "overallRank": 24,
      "url": "https://benchlm.ai/models/glm-5-reasoning",
      "markdownUrl": "https://benchlm.ai/md/models/glm-5-reasoning.md",
      "id": 22,
      "releaseDate": "2026-03-01",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "glm-5",
        "familyName": "GLM-5",
        "variantType": "reasoning",
        "snapshotLabel": null,
        "baseFamilyModelKey": "glm-5",
        "relatedModelKeys": [
          "glm-5"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 79,
        "overallScore": 80,
        "rawOverallScore": 80,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 77,
          "coding": 69.1,
          "reasoning": 87.4,
          "multimodalGrounded": 72.6,
          "knowledge": 81.4,
          "multilingual": 79.2,
          "instructionFollowing": 81,
          "math": 92.2
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 24,
        "categoryRanks": {
          "agentic": 17,
          "coding": 41,
          "reasoning": 11,
          "multimodalGrounded": 26,
          "knowledge": 17,
          "multilingual": 23,
          "instructionFollowing": 35,
          "math": 11
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 1,
        "verifiedBenchmarkCount": 1,
        "rankableBenchmarkCount": 41,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {
          "vibeCodeBench": 23.359
        },
        "reasoning": {},
        "multimodalGrounded": {
          "designArenaWebsite": 1292
        },
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "gpt-5-2",
      "canonicalModelKey": "gpt-5-2",
      "model": "GPT-5.2",
      "creator": "OpenAI",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "400K",
      "contextWindowTokens": 400000,
      "displayScore": 78,
      "rankingEligible": true,
      "overallRank": 25,
      "url": "https://benchlm.ai/models/gpt-5-2",
      "markdownUrl": "https://benchlm.ai/md/models/gpt-5-2.md",
      "id": 26,
      "releaseDate": "2025-12-11",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "gpt-5-2",
        "familyName": "GPT-5.2",
        "variantType": "thinking",
        "snapshotLabel": null,
        "baseFamilyModelKey": "gpt-5-2",
        "relatedModelKeys": [
          "gpt-5-2-instant",
          "gpt-5-2-pro"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 78,
        "overallScore": 78,
        "rawOverallScore": 78,
        "verifiedDisplayScore": 66,
        "displayCategoryScores": {
          "agentic": 59.2,
          "coding": 78.7,
          "reasoning": 83.5,
          "multimodalGrounded": 81.5,
          "knowledge": 91.1,
          "multilingual": 95.9,
          "instructionFollowing": 84.9,
          "math": 80.9
        },
        "verifiedDisplayCategoryScores": {
          "agentic": 55.2,
          "coding": 64.7,
          "reasoning": 52.9,
          "multimodalGrounded": 80.3,
          "knowledge": 92.4,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 25,
        "categoryRanks": {
          "agentic": 31,
          "coding": 24,
          "reasoning": 12,
          "multimodalGrounded": 15,
          "knowledge": 7,
          "multilingual": 8,
          "instructionFollowing": 25,
          "math": 22
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 11,
        "verifiedBenchmarkCount": 11,
        "rankableBenchmarkCount": 57,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 2
      },
      "benchmarks": {
        "agentic": {
          "browseComp": 65.8,
          "osWorldVerified": 47.3,
          "aaAgenticIndex": 60.2,
          "tau2Bench": 84.8,
          "gdpvalAaNormalized": 48.3,
          "gdpvalAa": 1467,
          "gertLabs": 46.54
        },
        "coding": {
          "sweVerified": 80,
          "swePro": 55.6,
          "vibeCodeBench": 53.499,
          "aaCodingIndex": 48.67,
          "terminalBenchHard": 47,
          "aaSciCode": 52.1
        },
        "reasoning": {
          "arcAgi2": 52.9,
          "lcr": 72.7,
          "critpt": 11.6
        },
        "multimodalGrounded": {
          "mmmuPro": 79.5,
          "mathVision": 83,
          "charxiv": 82.1,
          "vStar": 75.9,
          "designArenaWebsite": 1240
        },
        "knowledge": {
          "gpqa": 92.4,
          "artificialAnalysis": 51.28,
          "aaGpqaDiamond": 90.3,
          "aaHle": 35.4,
          "aaOmniscienceIndex": -1,
          "omniscienceAccuracy": 43.8,
          "omniscienceHallucinationRate": 79.7
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 75.4
        },
        "math": {},
        "korean": {}
      }
    },
    {
      "slug": "qwen3-5-397b-reasoning",
      "canonicalModelKey": "qwen3-5-397b-reasoning",
      "model": "Qwen3.5 397B (Reasoning)",
      "creator": "Alibaba",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 77,
      "rankingEligible": true,
      "overallRank": 26,
      "url": "https://benchlm.ai/models/qwen3-5-397b-reasoning",
      "markdownUrl": "https://benchlm.ai/md/models/qwen3-5-397b-reasoning.md",
      "id": 28,
      "releaseDate": "2026-02-16",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "qwen3-5-397b",
        "familyName": "Qwen3.5 397B",
        "variantType": "reasoning",
        "snapshotLabel": null,
        "baseFamilyModelKey": "qwen3-5-397b",
        "relatedModelKeys": [
          "qwen3-5-397b"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 77,
        "overallScore": 77,
        "rawOverallScore": 77,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 68.8,
          "coding": 85.4,
          "reasoning": 81.7,
          "multimodalGrounded": 58.4,
          "knowledge": 78,
          "multilingual": 82.9,
          "instructionFollowing": 80.6,
          "math": 92.3
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 26,
        "categoryRanks": {
          "agentic": 23,
          "reasoning": 15,
          "multimodalGrounded": 56,
          "knowledge": 23,
          "multilingual": 15,
          "instructionFollowing": 37,
          "math": 10
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": false,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 33,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 55.83,
          "apexAgentsAa": 15.3,
          "tau2Bench": 95.6,
          "gdpvalAaNormalized": 34.5,
          "gdpvalAa": 1190
        },
        "coding": {
          "aaCodingIndex": 41.28,
          "terminalBenchHard": 40.9,
          "aaSciCode": 42
        },
        "reasoning": {
          "lcr": 65.7,
          "critpt": 1.7
        },
        "multimodalGrounded": {
          "aaMmmuPro": 77.3
        },
        "knowledge": {
          "artificialAnalysis": 45.05,
          "aaGpqaDiamond": 89.3,
          "aaHle": 27.3,
          "aaOmniscienceIndex": -29.8,
          "omniscienceAccuracy": 31.4,
          "omniscienceHallucinationRate": 89.1
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 78.8
        },
        "math": {}
      }
    },
    {
      "slug": "gpt-5-1",
      "canonicalModelKey": "gpt-5-1",
      "model": "GPT-5.1",
      "creator": "OpenAI",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "200K",
      "contextWindowTokens": 200000,
      "displayScore": 77,
      "rankingEligible": true,
      "overallRank": 27,
      "url": "https://benchlm.ai/models/gpt-5-1",
      "markdownUrl": "https://benchlm.ai/md/models/gpt-5-1.md",
      "id": 23,
      "releaseDate": "2025-11-13",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "gpt-5-1",
        "familyName": "GPT-5.1",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "gpt-5-1",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 77,
        "overallScore": 81,
        "rawOverallScore": 81,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 74.1,
          "coding": 76,
          "reasoning": 66.7,
          "multimodalGrounded": 96.3,
          "knowledge": 81.5,
          "multilingual": 82.8,
          "instructionFollowing": 75.8,
          "math": 68.2
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 27,
        "categoryRanks": {
          "agentic": 20,
          "coding": 32,
          "reasoning": 29,
          "multimodalGrounded": 4,
          "knowledge": 16,
          "multilingual": 16,
          "instructionFollowing": 48,
          "math": 36
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 1,
        "verifiedBenchmarkCount": 1,
        "rankableBenchmarkCount": 38,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 51.26,
          "tau2Bench": 81.9,
          "gdpvalAaNormalized": 36.4,
          "gdpvalAa": 1227,
          "gertLabs": 41.24
        },
        "coding": {
          "vibeCodeBench": 24.606,
          "aaCodingIndex": 44.73,
          "terminalBenchHard": 45.5,
          "aaSciCode": 43.3
        },
        "reasoning": {
          "lcr": 75,
          "critpt": 4.9
        },
        "multimodalGrounded": {
          "aaMmmuPro": 75.5,
          "designArenaWebsite": 1233
        },
        "knowledge": {
          "artificialAnalysis": 47.7,
          "aaGpqaDiamond": 87.3,
          "aaHle": 26.5,
          "aaOmniscienceIndex": 5.6,
          "omniscienceAccuracy": 37.6,
          "omniscienceHallucinationRate": 51.3
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 72.9
        },
        "math": {},
        "korean": {}
      }
    },
    {
      "slug": "claude-opus-4-5",
      "canonicalModelKey": "claude-opus-4-5",
      "model": "Claude Opus 4.5",
      "creator": "Anthropic",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "200K",
      "contextWindowTokens": 200000,
      "displayScore": 76,
      "rankingEligible": true,
      "overallRank": 28,
      "url": "https://benchlm.ai/models/claude-opus-4-5",
      "markdownUrl": "https://benchlm.ai/md/models/claude-opus-4-5.md",
      "id": 36,
      "releaseDate": "2025-11-01",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "claude-opus-4-5",
        "familyName": "Claude Opus 4.5",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "claude-opus-4-5",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 76,
        "overallScore": 72,
        "rawOverallScore": 72,
        "verifiedDisplayScore": 68,
        "displayCategoryScores": {
          "agentic": 73.5,
          "coding": 75.6,
          "reasoning": 69.6,
          "multimodalGrounded": 61.2,
          "knowledge": 83,
          "multilingual": 81.4,
          "instructionFollowing": 63.2,
          "math": 94.9
        },
        "verifiedDisplayCategoryScores": {
          "agentic": 62.5,
          "coding": 65.9,
          "reasoning": 64.4,
          "multimodalGrounded": 70,
          "knowledge": 66.2,
          "multilingual": 85.7,
          "instructionFollowing": 79.4,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": true,
        "overallRank": 28,
        "categoryRanks": {
          "agentic": 21,
          "coding": 34,
          "reasoning": 25,
          "multimodalGrounded": 51,
          "knowledge": 12,
          "multilingual": 20,
          "instructionFollowing": 63,
          "math": 5
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 41,
        "verifiedBenchmarkCount": 41,
        "rankableBenchmarkCount": 82,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 4
      },
      "benchmarks": {
        "agentic": {
          "terminalBench2": 59.3,
          "osWorldVerified": 66.3,
          "osWorld": 66.3,
          "clawEval": 59.6,
          "qwenClawBench": 52.3,
          "tau3Bench": 70.2,
          "vitaBench": 23.3,
          "deepPlanning": 26.4,
          "toolathlon": 43.5,
          "mcpAtlas": 42.3,
          "mcpTasks": 71.8,
          "wideResearch": 76.4,
          "cyberGym": 50.6,
          "aaAgenticIndex": 59.22,
          "tau2Bench": 86.3,
          "gdpvalAaNormalized": 45.9,
          "gdpvalAa": 1418,
          "gertLabs": 64.23
        },
        "coding": {
          "sweVerified": 80.9,
          "liveCodeBenchV6": 84.8,
          "swePro": 57.1,
          "sweMultilingual": 77.5,
          "nl2Repo": 43.2,
          "aaCodingIndex": 42.94,
          "terminalBenchHard": 40.9,
          "aaSciCode": 47
        },
        "reasoning": {
          "longBenchV2": 64.4,
          "aiNeedle": 74,
          "lcr": 65.3,
          "critpt": 0.3
        },
        "multimodalGrounded": {
          "mmmuPro": 70.6,
          "mathVision": 74.3,
          "charxiv": 68.5,
          "videoMmmu": 84.4,
          "screenSpotPro": 45.7,
          "vStar": 67,
          "aaMmmuPro": 71.2,
          "designArenaWebsite": 1292
        },
        "knowledge": {
          "gpqa": 87,
          "superGpqa": 70.6,
          "mmluPro": 89.5,
          "mmluRedux": 96.6,
          "cEval": 92.2,
          "hle": 30.8,
          "artificialAnalysis": 43.09,
          "aaGpqaDiamond": 81,
          "aaHle": 12.9,
          "aaOmniscienceIndex": -3.9,
          "omniscienceAccuracy": 40.7,
          "omniscienceHallucinationRate": 75.4
        },
        "multilingual": {
          "mmluProX": 85.7,
          "nova63": 56.7
        },
        "instructionFollowing": {
          "ifeval": 90.9,
          "ifBench": 58,
          "aaIfBench": 43
        },
        "math": {
          "aime2026": 95.1,
          "hmmtFeb2025": 92.9,
          "hmmtNov2025": 93.3,
          "hmmtFeb2026": 85.3,
          "mmAnswerBench": 84
        }
      }
    },
    {
      "slug": "gpt-5-high",
      "canonicalModelKey": "gpt-5-high",
      "model": "GPT-5 (high)",
      "creator": "OpenAI",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 76,
      "rankingEligible": true,
      "overallRank": 29,
      "url": "https://benchlm.ai/models/gpt-5-high",
      "markdownUrl": "https://benchlm.ai/md/models/gpt-5-high.md",
      "id": 19,
      "releaseDate": "2025-08-07",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "gpt-5",
        "familyName": "GPT-5",
        "variantType": "reasoning",
        "snapshotLabel": "high",
        "baseFamilyModelKey": "gpt-5-high",
        "relatedModelKeys": [
          "gpt-5-medium",
          "gpt-5-mini",
          "gpt-5-nano"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 76,
        "overallScore": 83,
        "rawOverallScore": 83,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 74.8,
          "coding": 69.7,
          "reasoning": 76.3,
          "multimodalGrounded": 93.3,
          "knowledge": 78.9,
          "multilingual": 79.2,
          "instructionFollowing": 80.7,
          "math": 70.2
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 29,
        "categoryRanks": {
          "agentic": 19,
          "coding": 40,
          "reasoning": 19,
          "multimodalGrounded": 7,
          "knowledge": 21,
          "multilingual": 24,
          "instructionFollowing": 36,
          "math": 35
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 1,
        "verifiedBenchmarkCount": 1,
        "rankableBenchmarkCount": 37,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 54.65,
          "tau2Bench": 84.8,
          "gdpvalAaNormalized": 39.6,
          "gdpvalAa": 1292
        },
        "coding": {
          "vibeCodeBench": 20.088,
          "aaCodingIndex": 36.03,
          "terminalBenchHard": 32.6,
          "aaSciCode": 42.9
        },
        "reasoning": {
          "lcr": 75.6,
          "critpt": 5.7
        },
        "multimodalGrounded": {
          "aaMmmuPro": 74.2,
          "designArenaWebsite": 1230
        },
        "knowledge": {
          "artificialAnalysis": 44.63,
          "aaGpqaDiamond": 85.4,
          "aaHle": 26.5,
          "aaOmniscienceIndex": -8.1,
          "omniscienceAccuracy": 40.7,
          "omniscienceHallucinationRate": 82.1
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 73.1
        },
        "math": {}
      }
    },
    {
      "slug": "gpt-5-2-codex",
      "canonicalModelKey": "gpt-5-2-codex",
      "model": "GPT-5.2-Codex",
      "creator": "OpenAI",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "400K",
      "contextWindowTokens": 400000,
      "displayScore": 76,
      "rankingEligible": true,
      "overallRank": 30,
      "url": "https://benchlm.ai/models/gpt-5-2-codex",
      "markdownUrl": "https://benchlm.ai/md/models/gpt-5-2-codex.md",
      "id": 4,
      "releaseDate": "2025-12-18",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "gpt-5-2-codex",
        "familyName": "GPT-5.2-Codex",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "gpt-5-2-codex",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 76,
        "overallScore": 90,
        "rawOverallScore": 90,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 77.5,
          "coding": 77.5,
          "reasoning": 87.8,
          "multimodalGrounded": 89,
          "knowledge": 77.4,
          "multilingual": 84.8,
          "instructionFollowing": 91.6,
          "math": 97.7
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 30,
        "categoryRanks": {
          "coding": 29,
          "reasoning": 10,
          "multimodalGrounded": 11,
          "knowledge": 25,
          "multilingual": 11,
          "instructionFollowing": 16,
          "math": 3
        },
        "categoryRankingEligible": {
          "agentic": false,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 1,
        "verifiedBenchmarkCount": 1,
        "rankableBenchmarkCount": 31,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 56.52,
          "tau2Bench": 92.1,
          "gdpvalAaNormalized": 39.4,
          "gdpvalAa": 1288,
          "gertLabs": 51.79
        },
        "coding": {
          "vibeCodeBench": 37.912,
          "aaCodingIndex": 42.96,
          "terminalBenchHard": 37.1,
          "aaSciCode": 54.6
        },
        "reasoning": {
          "lcr": 75.7,
          "critpt": 8.7
        },
        "multimodalGrounded": {
          "aaMmmuPro": 76.3
        },
        "knowledge": {
          "artificialAnalysis": 49.03,
          "aaGpqaDiamond": 89.9,
          "aaHle": 33.5,
          "aaOmniscienceIndex": -2.5,
          "omniscienceAccuracy": 40.7,
          "omniscienceHallucinationRate": 72.8
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 77.6
        },
        "math": {}
      }
    },
    {
      "slug": "kimi-k2-5-reasoning",
      "canonicalModelKey": "kimi-k2-5-reasoning",
      "model": "Kimi K2.5 (Reasoning)",
      "creator": "Moonshot AI",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 75,
      "rankingEligible": true,
      "overallRank": 31,
      "url": "https://benchlm.ai/models/kimi-k2-5-reasoning",
      "markdownUrl": "https://benchlm.ai/md/models/kimi-k2-5-reasoning.md",
      "id": 35,
      "releaseDate": "2026-02-01",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "kimi-k2-5",
        "familyName": "Kimi K2.5",
        "variantType": "reasoning",
        "snapshotLabel": null,
        "baseFamilyModelKey": "kimi-k2-5",
        "relatedModelKeys": [
          "kimi-k2-5"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 75,
        "overallScore": 72,
        "rawOverallScore": 72,
        "verifiedDisplayScore": 73,
        "displayCategoryScores": {
          "agentic": 62,
          "coding": 85,
          "reasoning": 69.2,
          "multimodalGrounded": 67.8,
          "knowledge": 74,
          "multilingual": 87.8,
          "instructionFollowing": 98.9,
          "math": 67.2
        },
        "verifiedDisplayCategoryScores": {
          "agentic": 54.6,
          "coding": 76.8,
          "reasoning": null,
          "multimodalGrounded": 78.5,
          "knowledge": 87.3,
          "multilingual": null,
          "instructionFollowing": null,
          "math": 96.1
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 31,
        "categoryRanks": {
          "agentic": 25,
          "coding": 14,
          "reasoning": 26,
          "multimodalGrounded": 32,
          "knowledge": 30,
          "multilingual": 10,
          "instructionFollowing": 3,
          "math": 38
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 8,
        "verifiedBenchmarkCount": 8,
        "rankableBenchmarkCount": 39,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 2
      },
      "benchmarks": {
        "agentic": {
          "terminalBench2": 50.8,
          "browseComp": 60.6,
          "aaAgenticIndex": 58.94,
          "apexAgentsAa": 11.5,
          "tau2Bench": 95.9,
          "gdpvalAaNormalized": 39.2,
          "gdpvalAa": 1284,
          "gertLabs": 32.58
        },
        "coding": {
          "sweVerified": 76.8,
          "vibeCodeBench": 17.536,
          "aaCodingIndex": 39.55,
          "terminalBenchHard": 34.8,
          "aaSciCode": 49
        },
        "reasoning": {
          "lcr": 65.3,
          "critpt": 3.1
        },
        "multimodalGrounded": {
          "mmmuPro": 78.5,
          "aaMmmuPro": 75.4,
          "designArenaWebsite": 1294
        },
        "knowledge": {
          "gpqa": 87.6,
          "mmluPro": 87.1,
          "artificialAnalysis": 46.81,
          "aaGpqaDiamond": 87.9,
          "aaHle": 29.4,
          "aaOmniscienceIndex": -8.1,
          "omniscienceAccuracy": 34.3,
          "omniscienceHallucinationRate": 64.6
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 70.2
        },
        "math": {
          "aime2025": 96.1
        }
      }
    },
    {
      "slug": "gpt-5-1-codex-max",
      "canonicalModelKey": "gpt-5-1-codex-max",
      "model": "GPT-5.1-Codex-Max",
      "creator": "OpenAI",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "400K",
      "contextWindowTokens": 400000,
      "displayScore": 75,
      "rankingEligible": true,
      "overallRank": 32,
      "url": "https://benchlm.ai/models/gpt-5-1-codex-max",
      "markdownUrl": "https://benchlm.ai/md/models/gpt-5-1-codex-max.md",
      "id": 1,
      "releaseDate": "2025-11-19",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "gpt-5-1-codex-max",
        "familyName": "GPT-5.1-Codex-Max",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "gpt-5-1-codex-max",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 75,
        "overallScore": 90,
        "rawOverallScore": 90,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 75.5,
          "coding": 75.2,
          "reasoning": 88.4,
          "multimodalGrounded": 90.1,
          "knowledge": 78.2,
          "multilingual": 82.8,
          "instructionFollowing": 87.9,
          "math": 97.2
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 32,
        "categoryRanks": {
          "reasoning": 7,
          "multimodalGrounded": 9,
          "knowledge": 22,
          "multilingual": 17,
          "instructionFollowing": 21,
          "math": 4
        },
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 1,
        "verifiedBenchmarkCount": 1,
        "rankableBenchmarkCount": 30,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 50.68,
          "tau2Bench": 83,
          "gdpvalAaNormalized": 34.5,
          "gdpvalAa": 1191
        },
        "coding": {
          "vibeCodeBench": 22.168,
          "aaCodingIndex": 36.62,
          "terminalBenchHard": 34.8,
          "aaSciCode": 40.2
        },
        "reasoning": {
          "lcr": 67.3,
          "critpt": 5.7
        },
        "multimodalGrounded": {
          "aaMmmuPro": 72.5
        },
        "knowledge": {
          "artificialAnalysis": 43.11,
          "aaGpqaDiamond": 86,
          "aaHle": 23.4,
          "aaOmniscienceIndex": -6,
          "omniscienceAccuracy": 39.2,
          "omniscienceHallucinationRate": 74.4
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 70
        },
        "math": {}
      }
    },
    {
      "slug": "deepseek-v4-flash-max",
      "canonicalModelKey": "deepseek-v4-flash-max",
      "model": "DeepSeek V4 Flash (Max)",
      "creator": "DeepSeek",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 74,
      "rankingEligible": true,
      "overallRank": 33,
      "url": "https://benchlm.ai/models/deepseek-v4-flash-max",
      "markdownUrl": "https://benchlm.ai/md/models/deepseek-v4-flash-max.md",
      "id": 64,
      "releaseDate": "2026-04-24",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "deepseek-v4",
        "familyName": "DeepSeek V4",
        "variantType": "flash-reasoning",
        "snapshotLabel": "max",
        "baseFamilyModelKey": "deepseek-v4-pro-max",
        "relatedModelKeys": [
          "deepseek-v4-flash-base",
          "deepseek-v4-pro-base",
          "deepseek-v4-flash",
          "deepseek-v4-flash-high",
          "deepseek-v4-pro",
          "deepseek-v4-pro-high",
          "deepseek-v4-pro-max"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 74,
        "overallScore": 72,
        "rawOverallScore": 72,
        "verifiedDisplayScore": 66,
        "displayCategoryScores": {
          "agentic": 68.8,
          "coding": 82.2,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 66.8,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": 63.3,
          "coding": 73.7,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 60,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": true,
        "overallRank": 33,
        "categoryRanks": {
          "coding": 18,
          "knowledge": 39
        },
        "categoryRankingEligible": {
          "agentic": false,
          "coding": true,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": true,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 23,
        "verifiedBenchmarkCount": 23,
        "rankableBenchmarkCount": 24,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 2
      },
      "benchmarks": {
        "agentic": {
          "terminalBench2": 56.9,
          "browseComp": 73.2,
          "hleWithTools": 45.1,
          "mcpAtlas": 69,
          "gdpvalAa": 1388,
          "toolathlon": 47.8,
          "aaAgenticIndex": 61.28,
          "tau2Bench": 95,
          "gdpvalAaNormalized": 44.4
        },
        "coding": {
          "liveCodeBench": 91.6,
          "codeforces": 3052,
          "sweVerified": 79,
          "swePro": 52.6,
          "sweMultilingual": 73.3,
          "terminalBench2": 56.9,
          "aaCodingIndex": 38.71,
          "terminalBenchHard": 35.6,
          "aaSciCode": 44.9
        },
        "reasoning": {
          "mrcr1m": 78.7,
          "corpusQa1m": 60.5,
          "lcr": 63,
          "critpt": 7.1
        },
        "multimodalGrounded": {
          "designArenaWebsite": 1259
        },
        "knowledge": {
          "mmluPro": 86.2,
          "simpleQa": 34.1,
          "chineseSimpleQa": 78.9,
          "gpqa": 88.1,
          "gpqaDiamond": 88.1,
          "hle": 34.8,
          "artificialAnalysis": 46.52,
          "aaGpqaDiamond": 89.4,
          "aaHle": 32.1,
          "aaOmniscienceIndex": -22.9,
          "omniscienceAccuracy": 37.2,
          "omniscienceHallucinationRate": 95.8
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 79.2
        },
        "math": {
          "hmmtFeb2026": 94.8,
          "imoAnswerBench": 88.4,
          "apex": 33,
          "apexShortlist": 85.7
        }
      }
    },
    {
      "slug": "qwen3-6-27b",
      "canonicalModelKey": "qwen3-6-27b",
      "model": "Qwen3.6-27B",
      "creator": "Alibaba",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "262K",
      "contextWindowTokens": 262000,
      "displayScore": 72,
      "rankingEligible": true,
      "overallRank": 34,
      "url": "https://benchlm.ai/models/qwen3-6-27b",
      "markdownUrl": "https://benchlm.ai/md/models/qwen3-6-27b.md",
      "id": 63,
      "releaseDate": "2026-04-21",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "qwen3-6-27b",
        "familyName": "Qwen3.6-27B",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "qwen3-6-27b",
        "relatedModelKeys": [
          "qwen3-6-35b-a3b",
          "qwen3-5-27b"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 72,
        "overallScore": 70,
        "rawOverallScore": 70,
        "verifiedDisplayScore": 66,
        "displayCategoryScores": {
          "agentic": 69.5,
          "coding": 77.9,
          "reasoning": null,
          "multimodalGrounded": 65,
          "knowledge": 68.7,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": 59.3,
          "coding": 70.6,
          "reasoning": null,
          "multimodalGrounded": 76.6,
          "knowledge": 62.2,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": true,
        "overallRank": 34,
        "categoryRanks": {
          "coding": 26,
          "multimodalGrounded": 43,
          "knowledge": 38
        },
        "categoryRankingEligible": {
          "agentic": false,
          "coding": true,
          "reasoning": false,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 37,
        "verifiedBenchmarkCount": 37,
        "rankableBenchmarkCount": 37,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 2
      },
      "benchmarks": {
        "agentic": {
          "terminalBench2": 59.3,
          "clawEval": 72.4,
          "qwenClawBench": 53.4,
          "qwenWebBench": 1487,
          "androidWorld": 70.3,
          "aaAgenticIndex": 62.85,
          "tau2Bench": 94.2,
          "gdpvalAaNormalized": 45.2,
          "gdpvalAa": 1403,
          "gertLabs": 54.84
        },
        "coding": {
          "sweVerified": 77.2,
          "sweMultilingual": 71.3,
          "swePro": 53.5,
          "terminalBench2": 59.3,
          "liveCodeBench": 83.9,
          "nl2Repo": 36.2,
          "aaCodingIndex": 36.5,
          "terminalBenchHard": 34.8,
          "aaSciCode": 39.8
        },
        "reasoning": {
          "lcr": 68.7,
          "critpt": 1.1
        },
        "multimodalGrounded": {
          "mmmu": 82.9,
          "mmmuPro": 75.8,
          "realWorldQa": 84.1,
          "dynaMath": 85.6,
          "mStar": 81.4,
          "simpleVqa": 56.1,
          "charxiv": 78.4,
          "ccOcr": 81.2,
          "countBench": 97.8,
          "refcocoAvg": 92.5,
          "erqa": 62.5,
          "videoMmeWithSub": 87.7,
          "videoMmmu": 84.4,
          "mlvuAvg": 86.6,
          "vStar": 94.7,
          "aaMmmuPro": 74.6
        },
        "knowledge": {
          "mmluPro": 86.2,
          "mmluRedux": 93.5,
          "superGpqa": 66,
          "cEval": 91.4,
          "gpqa": 87.8,
          "hle": 24,
          "artificialAnalysis": 45.82,
          "aaGpqaDiamond": 84.2,
          "aaHle": 21.6,
          "aaOmniscienceIndex": -19.8,
          "omniscienceAccuracy": 19.2,
          "omniscienceHallucinationRate": 48.3
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 67.6
        },
        "math": {
          "hmmtFeb2025": 93.8,
          "hmmtNov2025": 90.7,
          "hmmtFeb2026": 84.3,
          "mmAnswerBench": 80.8,
          "aime2026": 94.1
        }
      }
    },
    {
      "slug": "grok-4-20-beta",
      "canonicalModelKey": "grok-4-20-beta",
      "model": "Grok 4.20",
      "creator": "xAI",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "2M",
      "contextWindowTokens": 2000000,
      "displayScore": 71,
      "rankingEligible": true,
      "overallRank": 35,
      "url": "https://benchlm.ai/models/grok-4-20-beta",
      "markdownUrl": "https://benchlm.ai/md/models/grok-4-20-beta.md",
      "id": 85,
      "releaseDate": "2026-03-10",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "grok-4-20",
        "familyName": "Grok 4.20",
        "variantType": "reasoning",
        "snapshotLabel": null,
        "baseFamilyModelKey": "grok-4-20-beta",
        "relatedModelKeys": [
          "grok-4-20-multi-agent-beta"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": "grok-4-1"
      },
      "scores": {
        "displayScore": 71,
        "overallScore": 60,
        "rawOverallScore": 60,
        "verifiedDisplayScore": 57,
        "displayCategoryScores": {
          "agentic": 54.5,
          "coding": 76.4,
          "reasoning": 64.7,
          "multimodalGrounded": 47.6,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": 95.4,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": 47.1,
          "coding": 61,
          "reasoning": 53.3,
          "multimodalGrounded": 70.8,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 35,
        "categoryRanks": {
          "multimodalGrounded": 71,
          "instructionFollowing": 7
        },
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": true,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": true,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 16,
        "verifiedBenchmarkCount": 16,
        "rankableBenchmarkCount": 18,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 3
      },
      "benchmarks": {
        "agentic": {
          "terminalBench2": 47.1,
          "deepSearchQa": 62.8,
          "gertLabs": 38.36
        },
        "coding": {
          "liveCodeBenchPro": 74.2,
          "sweVerified": 76.7,
          "swePro": 51.8,
          "vibeCodeBench": 4.064
        },
        "reasoning": {
          "arcAgi2": 53.3
        },
        "multimodalGrounded": {
          "mmmuPro": 75.2,
          "charxiv": 60.9,
          "erqa": 54.1,
          "simpleVqa": 57.4,
          "medXpertQaMm": 65.8,
          "gdpvalAa": 1055
        },
        "knowledge": {
          "gpqaDiamond": 88.5,
          "hleNoTools": 31.6,
          "healthBenchHard": 20.3,
          "medXpertQaText": 50.2
        },
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "deepseek-v4-flash-high",
      "canonicalModelKey": "deepseek-v4-flash-high",
      "model": "DeepSeek V4 Flash (High)",
      "creator": "DeepSeek",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 71,
      "rankingEligible": true,
      "overallRank": 36,
      "url": "https://benchlm.ai/models/deepseek-v4-flash-high",
      "markdownUrl": "https://benchlm.ai/md/models/deepseek-v4-flash-high.md",
      "id": 83,
      "releaseDate": "2026-04-24",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "deepseek-v4",
        "familyName": "DeepSeek V4",
        "variantType": "flash-reasoning",
        "snapshotLabel": "high",
        "baseFamilyModelKey": "deepseek-v4-pro-max",
        "relatedModelKeys": [
          "deepseek-v4-flash-base",
          "deepseek-v4-pro-base",
          "deepseek-v4-flash",
          "deepseek-v4-flash-max",
          "deepseek-v4-pro",
          "deepseek-v4-pro-high",
          "deepseek-v4-pro-max"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 71,
        "overallScore": 65,
        "rawOverallScore": 65,
        "verifiedDisplayScore": 62,
        "displayCategoryScores": {
          "agentic": 60.6,
          "coding": 82.5,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 62.1,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": 55.4,
          "coding": 72.2,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 57.2,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": true,
        "overallRank": 36,
        "categoryRanks": {
          "coding": 17,
          "knowledge": 50
        },
        "categoryRankingEligible": {
          "agentic": false,
          "coding": true,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": true,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 23,
        "verifiedBenchmarkCount": 23,
        "rankableBenchmarkCount": 24,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 2
      },
      "benchmarks": {
        "agentic": {
          "terminalBench2": 56.6,
          "browseComp": 53.5,
          "hleWithTools": 40.3,
          "mcpAtlas": 67.4,
          "toolathlon": 43.5,
          "aaAgenticIndex": 62.33,
          "tau2Bench": 95.6,
          "gdpvalAaNormalized": 45.7,
          "gdpvalAa": 1414
        },
        "coding": {
          "liveCodeBench": 88.4,
          "codeforces": 2816,
          "sweVerified": 78.6,
          "swePro": 52.3,
          "sweMultilingual": 70.2,
          "terminalBench2": 56.6,
          "aaCodingIndex": 39.76,
          "terminalBenchHard": 38.6,
          "aaSciCode": 42
        },
        "reasoning": {
          "mrcr1m": 76.9,
          "corpusQa1m": 59.3,
          "lcr": 62.7,
          "critpt": 3.4
        },
        "multimodalGrounded": {
          "designArenaWebsite": 1259
        },
        "knowledge": {
          "mmluPro": 86.4,
          "simpleQa": 28.9,
          "chineseSimpleQa": 73.2,
          "gpqa": 87.4,
          "gpqaDiamond": 87.4,
          "hle": 29.4,
          "artificialAnalysis": 46,
          "aaGpqaDiamond": 86.7,
          "aaHle": 27.8,
          "aaOmniscienceIndex": -22.3,
          "omniscienceAccuracy": 35.5,
          "omniscienceHallucinationRate": 89.7
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 73.5
        },
        "math": {
          "hmmtFeb2026": 91.9,
          "imoAnswerBench": 85.1,
          "apex": 19.1,
          "apexShortlist": 72.1
        }
      }
    },
    {
      "slug": "gpt-5-medium",
      "canonicalModelKey": "gpt-5-medium",
      "model": "GPT-5 (medium)",
      "creator": "OpenAI",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 70,
      "rankingEligible": true,
      "overallRank": 37,
      "url": "https://benchlm.ai/models/gpt-5-medium",
      "markdownUrl": "https://benchlm.ai/md/models/gpt-5-medium.md",
      "id": 16,
      "releaseDate": "2025-08-07",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "gpt-5",
        "familyName": "GPT-5",
        "variantType": "reasoning",
        "snapshotLabel": "medium",
        "baseFamilyModelKey": "gpt-5-high",
        "relatedModelKeys": [
          "gpt-5-high",
          "gpt-5-mini",
          "gpt-5-nano"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 70,
        "overallScore": 84,
        "rawOverallScore": 84,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 68.1,
          "coding": 77.7,
          "reasoning": 74.1,
          "multimodalGrounded": 90.6,
          "knowledge": 73.3,
          "multilingual": 83.8,
          "instructionFollowing": 76.9,
          "math": 91.7
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 37,
        "categoryRanks": {
          "agentic": 24,
          "coding": 27,
          "reasoning": 21,
          "multimodalGrounded": 8,
          "knowledge": 33,
          "multilingual": 14,
          "instructionFollowing": 44,
          "math": 12
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 33,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 45.83,
          "tau2Bench": 86.5,
          "gdpvalAaNormalized": 25.1,
          "gdpvalAa": 1001
        },
        "coding": {
          "aaCodingIndex": 38.95,
          "terminalBenchHard": 37.9,
          "aaSciCode": 41.1
        },
        "reasoning": {
          "lcr": 72.8,
          "critpt": 0
        },
        "multimodalGrounded": {
          "aaMmmuPro": 74.3,
          "designArenaWebsite": 1230
        },
        "knowledge": {
          "artificialAnalysis": 42.03,
          "aaGpqaDiamond": 84.2,
          "aaHle": 23.5,
          "aaOmniscienceIndex": -10.1,
          "omniscienceAccuracy": 38.9,
          "omniscienceHallucinationRate": 80.1
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 70.6
        },
        "math": {}
      }
    },
    {
      "slug": "nemotron-3-ultra",
      "canonicalModelKey": "nemotron-3-ultra-500b",
      "model": "Nemotron 3 Ultra",
      "creator": "NVIDIA",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 68,
      "rankingEligible": true,
      "overallRank": 38,
      "url": "https://benchlm.ai/models/nemotron-3-ultra",
      "markdownUrl": "https://benchlm.ai/md/models/nemotron-3-ultra.md",
      "id": 71,
      "releaseDate": "2026-06-04",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "nemotron-3-ultra-500b",
        "familyName": "Nemotron 3 Ultra",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "nemotron-3-ultra-500b",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 68,
        "overallScore": 63,
        "rawOverallScore": 63,
        "verifiedDisplayScore": 65,
        "displayCategoryScores": {
          "agentic": 51,
          "coding": 80.4,
          "reasoning": 44,
          "multimodalGrounded": null,
          "knowledge": 73.8,
          "multilingual": 72.8,
          "instructionFollowing": 96,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": 51.7,
          "coding": 74.2,
          "reasoning": 61.9,
          "multimodalGrounded": null,
          "knowledge": 62.6,
          "multilingual": 83,
          "instructionFollowing": 81.7,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": true,
        "overallRank": 38,
        "categoryRanks": {
          "coding": 22,
          "knowledge": 31,
          "multilingual": 27,
          "instructionFollowing": 5
        },
        "categoryRankingEligible": {
          "agentic": false,
          "coding": true,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 18,
        "verifiedBenchmarkCount": 18,
        "rankableBenchmarkCount": 18,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 3
      },
      "benchmarks": {
        "agentic": {
          "terminalBench2": 56.4,
          "pinchBench": 90,
          "browseComp": 44.4,
          "tau3Bench": 70.9,
          "gdpvalAaNormalized": 44,
          "hleWithTools": 37.4,
          "aaAgenticIndex": 57.06,
          "tau2Bench": 83.3,
          "gdpvalAa": 1379
        },
        "coding": {
          "sweVerified": 71.9,
          "sweMultilingual": 67.7,
          "liveCodeBench": 89,
          "sciCode": 44.6,
          "terminalBench2": 56.4,
          "aaCodingIndex": 37.55,
          "terminalBenchHard": 36.4,
          "aaSciCode": 39.9
        },
        "reasoning": {
          "lcr": 67,
          "critpt": 3.1,
          "longBenchV2": 61.9
        },
        "multimodalGrounded": {},
        "knowledge": {
          "gpqa": 87,
          "gpqaDiamond": 87,
          "hle": 26.7,
          "hleNoTools": 26.7,
          "mmluPro": 86.8,
          "omniscienceAccuracy": 21.6,
          "artificialAnalysis": 47.67,
          "aaGpqaDiamond": 86.7,
          "aaHle": 26.6,
          "aaOmniscienceIndex": -0.8,
          "omniscienceHallucinationRate": 28.5
        },
        "multilingual": {
          "mmluProX": 83
        },
        "instructionFollowing": {
          "ifBench": 81.7,
          "aaIfBench": 81.4
        },
        "math": {}
      }
    },
    {
      "slug": "deepseek-v4-pro",
      "canonicalModelKey": "deepseek-v4-pro",
      "model": "DeepSeek V4 Pro",
      "creator": "DeepSeek",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 68,
      "rankingEligible": true,
      "overallRank": 39,
      "url": "https://benchlm.ai/models/deepseek-v4-pro",
      "markdownUrl": "https://benchlm.ai/md/models/deepseek-v4-pro.md",
      "id": 104,
      "releaseDate": "2026-04-24",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "deepseek-v4",
        "familyName": "DeepSeek V4",
        "variantType": "pro",
        "snapshotLabel": "non-think",
        "baseFamilyModelKey": "deepseek-v4-pro-max",
        "relatedModelKeys": [
          "deepseek-v4-flash-base",
          "deepseek-v4-pro-base",
          "deepseek-v4-flash",
          "deepseek-v4-flash-high",
          "deepseek-v4-flash-max",
          "deepseek-v4-pro-high",
          "deepseek-v4-pro-max"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 68,
        "overallScore": 60,
        "rawOverallScore": 60,
        "verifiedDisplayScore": 57,
        "displayCategoryScores": {
          "agentic": 66.5,
          "coding": 68.4,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 50.3,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": 59.1,
          "coding": 58.8,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 49.4,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": true,
        "overallRank": 39,
        "categoryRanks": {
          "coding": 43,
          "knowledge": 60
        },
        "categoryRankingEligible": {
          "agentic": false,
          "coding": true,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": true,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 21,
        "verifiedBenchmarkCount": 21,
        "rankableBenchmarkCount": 22,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 2
      },
      "benchmarks": {
        "agentic": {
          "terminalBench2": 59.1,
          "mcpAtlas": 69.4,
          "toolathlon": 46.3,
          "clawEval": 59.8,
          "gertLabs": 50.28
        },
        "coding": {
          "liveCodeBench": 56.8,
          "sweVerified": 73.6,
          "swePro": 52.1,
          "sweMultilingual": 69.8,
          "terminalBench2": 59.1
        },
        "reasoning": {
          "mrcr1m": 44.7,
          "corpusQa1m": 35.6
        },
        "multimodalGrounded": {
          "designArenaWebsite": 1286
        },
        "knowledge": {
          "mmluPro": 82.9,
          "simpleQa": 45,
          "chineseSimpleQa": 75.8,
          "gpqa": 72.9,
          "gpqaDiamond": 72.9,
          "hle": 7.7
        },
        "multilingual": {},
        "instructionFollowing": {},
        "math": {
          "hmmtFeb2026": 31.7,
          "imoAnswerBench": 35.3,
          "apex": 0.4,
          "apexShortlist": 9.2
        }
      }
    },
    {
      "slug": "glm-4-7",
      "canonicalModelKey": "glm-4-7",
      "model": "GLM-4.7",
      "creator": "Z.AI",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "200K",
      "contextWindowTokens": 200000,
      "displayScore": 68,
      "rankingEligible": true,
      "overallRank": 40,
      "url": "https://benchlm.ai/models/glm-4-7",
      "markdownUrl": "https://benchlm.ai/md/models/glm-4-7.md",
      "id": 48,
      "releaseDate": "2025-10-01",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "glm-4-7",
        "familyName": "GLM-4.7",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "glm-4-7",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 68,
        "overallScore": 66,
        "rawOverallScore": 66,
        "verifiedDisplayScore": 50,
        "displayCategoryScores": {
          "agentic": 53.5,
          "coding": 72.5,
          "reasoning": 72.9,
          "multimodalGrounded": 58.2,
          "knowledge": 66.3,
          "multilingual": 71.3,
          "instructionFollowing": 72.9,
          "math": 78.2
        },
        "verifiedDisplayCategoryScores": {
          "agentic": 45.3,
          "coding": 70.6,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 24.8,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 40,
        "categoryRanks": {
          "agentic": 41,
          "coding": 36,
          "reasoning": 22,
          "multimodalGrounded": 58,
          "knowledge": 42,
          "multilingual": 30,
          "instructionFollowing": 50,
          "math": 25
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 7,
        "verifiedBenchmarkCount": 7,
        "rankableBenchmarkCount": 35,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "terminalBench2": 41,
          "browseComp": 52,
          "vitaBench": 15.5,
          "aaAgenticIndex": 55.01,
          "tau2Bench": 95.9,
          "gdpvalAaNormalized": 34.1,
          "gdpvalAa": 1183,
          "gertLabs": 39.95
        },
        "coding": {
          "sweVerified": 73.8,
          "liveCodeBench": 84.9,
          "sweRebench": 58.7,
          "aaCodingIndex": 36.26,
          "terminalBenchHard": 31.8,
          "aaSciCode": 45.1
        },
        "reasoning": {
          "lcr": 64,
          "critpt": 1.7
        },
        "multimodalGrounded": {
          "designArenaWebsite": 1272
        },
        "knowledge": {
          "gpqa": 85.7,
          "mmluPro": 84.3,
          "hle": 24.8,
          "artificialAnalysis": 42.11,
          "aaGpqaDiamond": 85.9,
          "aaHle": 25.1,
          "aaOmniscienceIndex": -34.6,
          "omniscienceAccuracy": 29.3,
          "omniscienceHallucinationRate": 90.3
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 67.9
        },
        "math": {
          "aime2025": 95.7
        }
      }
    },
    {
      "slug": "grok-4-1-fast",
      "canonicalModelKey": "grok-4-1-fast",
      "model": "Grok 4.1 Fast",
      "creator": "xAI",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 68,
      "rankingEligible": true,
      "overallRank": 41,
      "url": "https://benchlm.ai/models/grok-4-1-fast",
      "markdownUrl": "https://benchlm.ai/md/models/grok-4-1-fast.md",
      "id": 21,
      "releaseDate": "2025-11-19",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "grok-4-1-fast",
        "familyName": "Grok 4.1 Fast",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "grok-4-1-fast",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 68,
        "overallScore": 80,
        "rawOverallScore": 80,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 61.4,
          "coding": 59.8,
          "reasoning": 87.9,
          "multimodalGrounded": 89.7,
          "knowledge": 75.1,
          "multilingual": 74.6,
          "instructionFollowing": 79.7,
          "math": 93.7
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 41,
        "categoryRanks": {
          "agentic": 28,
          "coding": 49,
          "reasoning": 9,
          "multimodalGrounded": 10,
          "knowledge": 29,
          "multilingual": 26,
          "instructionFollowing": 38,
          "math": 9
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 36,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 32.95,
          "tau2Bench": 63.7,
          "gdpvalAaNormalized": 14.1,
          "gdpvalAa": 781,
          "gertLabs": 47.32
        },
        "coding": {
          "aaCodingIndex": 19.47,
          "terminalBenchHard": 14.4,
          "aaSciCode": 29.6
        },
        "reasoning": {
          "lcr": 22,
          "critpt": 0
        },
        "multimodalGrounded": {
          "aaMmmuPro": 48.4
        },
        "knowledge": {
          "artificialAnalysis": 23.56,
          "aaGpqaDiamond": 63.7,
          "aaHle": 5,
          "aaOmniscienceIndex": -50.9,
          "omniscienceAccuracy": 17,
          "omniscienceHallucinationRate": 81.8
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 36.5
        },
        "math": {}
      }
    },
    {
      "slug": "glm-5",
      "canonicalModelKey": "glm-5",
      "model": "GLM-5",
      "creator": "Z.AI",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "200K",
      "contextWindowTokens": 200000,
      "displayScore": 67,
      "rankingEligible": true,
      "overallRank": 42,
      "url": "https://benchlm.ai/models/glm-5",
      "markdownUrl": "https://benchlm.ai/md/models/glm-5.md",
      "id": 51,
      "releaseDate": "2026-03-01",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "glm-5",
        "familyName": "GLM-5",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "glm-5",
        "relatedModelKeys": [
          "glm-5-reasoning"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 67,
        "overallScore": 67,
        "rawOverallScore": 67,
        "verifiedDisplayScore": 65,
        "displayCategoryScores": {
          "agentic": 48.4,
          "coding": 76.2,
          "reasoning": 60.5,
          "multimodalGrounded": 56,
          "knowledge": 82.2,
          "multilingual": 70.9,
          "instructionFollowing": 84.3,
          "math": 91.3
        },
        "verifiedDisplayCategoryScores": {
          "agentic": 56.2,
          "coding": 63.2,
          "reasoning": 60.8,
          "multimodalGrounded": null,
          "knowledge": 70.7,
          "multilingual": 83.1,
          "instructionFollowing": 92.6,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": true,
        "overallRank": 42,
        "categoryRanks": {
          "agentic": 48,
          "coding": 31,
          "reasoning": 33,
          "multimodalGrounded": 61,
          "knowledge": 14,
          "multilingual": 31,
          "instructionFollowing": 28,
          "math": 13
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 33,
        "verifiedBenchmarkCount": 33,
        "rankableBenchmarkCount": 73,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 4
      },
      "benchmarks": {
        "agentic": {
          "terminalBench2": 56.2,
          "clawEval": 57.7,
          "qwenClawBench": 54.1,
          "tau3Bench": 65.6,
          "deepPlanning": 14.6,
          "toolathlon": 38,
          "mcpAtlas": 31.1,
          "mcpTasks": 60.8,
          "wideResearch": 69.8,
          "tau2Bench": 98.2,
          "cyberGym": 43.2,
          "aaAgenticIndex": 63.14,
          "apexAgentsAa": 14.5,
          "gdpvalAaNormalized": 44.6,
          "gdpvalAa": 1391,
          "gertLabs": 50.99
        },
        "coding": {
          "sweVerified": 77.8,
          "sweVerifiedArcee": 72.8,
          "swePro": 55.1,
          "sweMultilingual": 73.3,
          "sweRebench": 62.8,
          "reactNativeEvals": 74.8,
          "aaCodingIndex": 44.18,
          "terminalBenchHard": 43.2,
          "aaSciCode": 46.2
        },
        "reasoning": {
          "longBenchV2": 60.8,
          "aiNeedle": 63.3,
          "lcr": 63.3,
          "critpt": 2
        },
        "multimodalGrounded": {
          "designArenaWebsite": 1292
        },
        "knowledge": {
          "gpqa": 86,
          "gpqaDiamond": 86,
          "superGpqa": 66.8,
          "mmluPro": 85.7,
          "mmluProArcee": 85.8,
          "hle": 50.4,
          "artificialAnalysis": 49.77,
          "aaGpqaDiamond": 82,
          "aaHle": 27.2,
          "aaOmniscienceIndex": 2,
          "omniscienceAccuracy": 26.9,
          "omniscienceHallucinationRate": 34
        },
        "multilingual": {
          "mmluProX": 83.1,
          "nova63": 55.1
        },
        "instructionFollowing": {
          "ifeval": 92.6,
          "aaIfBench": 72.3
        },
        "math": {
          "aime2026": 95.8,
          "aime2025Arcee": 93.3,
          "hmmtFeb2025": 97.5,
          "hmmtNov2025": 96.9,
          "hmmtFeb2026": 86.4,
          "mmAnswerBench": 82.5
        }
      }
    },
    {
      "slug": "qwen3-6-plus",
      "canonicalModelKey": "qwen3-6-plus",
      "model": "Qwen3.6 Plus",
      "creator": "Alibaba",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 66,
      "rankingEligible": true,
      "overallRank": 43,
      "url": "https://benchlm.ai/models/qwen3-6-plus",
      "markdownUrl": "https://benchlm.ai/md/models/qwen3-6-plus.md",
      "id": 55,
      "releaseDate": "2026-04-02",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "qwen3-6-plus",
        "familyName": "Qwen3.6 Plus",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "qwen3-6-plus",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 66,
        "overallScore": 66,
        "rawOverallScore": 66,
        "verifiedDisplayScore": 68,
        "displayCategoryScores": {
          "agentic": 55.9,
          "coding": 77.7,
          "reasoning": 44.2,
          "multimodalGrounded": 71.6,
          "knowledge": 75.7,
          "multilingual": 77.6,
          "instructionFollowing": 91.8,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": 61.6,
          "coding": 64.8,
          "reasoning": 62,
          "multimodalGrounded": 79.6,
          "knowledge": 66,
          "multilingual": 84.7,
          "instructionFollowing": 87.8,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": true,
        "overallRank": 43,
        "categoryRanks": {
          "agentic": 36,
          "coding": 28,
          "multimodalGrounded": 28,
          "knowledge": 27,
          "multilingual": 25,
          "instructionFollowing": 15
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": false,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 38,
        "verifiedBenchmarkCount": 38,
        "rankableBenchmarkCount": 69,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 4
      },
      "benchmarks": {
        "agentic": {
          "terminalBench2": 61.6,
          "clawEval": 58.8,
          "qwenClawBench": 57.2,
          "tau3Bench": 70.7,
          "vitaBench": 44.3,
          "deepPlanning": 41.5,
          "toolathlon": 39.8,
          "mcpAtlas": 48.2,
          "mcpTasks": 74.1,
          "wideResearch": 74.3,
          "aaAgenticIndex": 61.67,
          "tau2Bench": 97.7,
          "gdpvalAaNormalized": 42.5,
          "gdpvalAa": 1350,
          "gertLabs": 50.6
        },
        "coding": {
          "sweVerified": 78.8,
          "swePro": 56.6,
          "sweMultilingual": 73.8,
          "liveCodeBenchV6": 87.1,
          "vibeCodeBench": 25.564,
          "aaCodingIndex": 42.87,
          "terminalBenchHard": 43.9,
          "aaSciCode": 40.7
        },
        "reasoning": {
          "aiNeedle": 68.3,
          "longBenchV2": 62,
          "lcr": 69.7,
          "critpt": 2.9
        },
        "multimodalGrounded": {
          "mmmu": 86,
          "mmmuPro": 78.8,
          "mathVision": 88,
          "videoMmmu": 84,
          "screenSpotPro": 68.2,
          "charxiv": 81.5,
          "vStar": 96.9,
          "aaMmmuPro": 78,
          "designArenaWebsite": 1264
        },
        "knowledge": {
          "gpqa": 90.4,
          "superGpqa": 71.6,
          "mmluPro": 88.5,
          "mmluRedux": 94.5,
          "cEval": 93.3,
          "hle": 28.8,
          "artificialAnalysis": 49.98,
          "aaGpqaDiamond": 88.2,
          "aaHle": 25.7,
          "aaOmniscienceIndex": 2.7,
          "omniscienceAccuracy": 26.2,
          "omniscienceHallucinationRate": 32
        },
        "multilingual": {
          "mmluProX": 84.7,
          "nova63": 57.9
        },
        "instructionFollowing": {
          "ifeval": 94.3,
          "ifBench": 75.8,
          "aaIfBench": 75.2
        },
        "math": {
          "aime2026": 95.3,
          "hmmtFeb2025": 96.7,
          "hmmtNov2025": 94.6,
          "hmmtFeb2026": 87.8,
          "mmAnswerBench": 83.8
        }
      }
    },
    {
      "slug": "mai-thinking-1",
      "canonicalModelKey": "mai-thinking-1",
      "model": "MAI-Thinking-1",
      "creator": "Microsoft",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "256K",
      "contextWindowTokens": 256000,
      "displayScore": 65,
      "rankingEligible": true,
      "overallRank": 44,
      "url": "https://benchlm.ai/models/mai-thinking-1",
      "markdownUrl": "https://benchlm.ai/md/models/mai-thinking-1.md",
      "id": 250,
      "releaseDate": "2026-06-02",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "mai-thinking",
        "familyName": "MAI-Thinking",
        "variantType": "1",
        "snapshotLabel": null,
        "baseFamilyModelKey": "mai-thinking-1",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 65,
        "overallScore": 65,
        "rawOverallScore": 65,
        "verifiedDisplayScore": 65,
        "displayCategoryScores": {
          "agentic": 31.9,
          "coding": 84.5,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 64.6,
          "multilingual": null,
          "instructionFollowing": 100,
          "math": 98.4
        },
        "verifiedDisplayCategoryScores": {
          "agentic": 46,
          "coding": 71,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 69.9,
          "multilingual": null,
          "instructionFollowing": 85,
          "math": 97
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": true,
        "overallRank": 44,
        "categoryRanks": {
          "coding": 15,
          "knowledge": 47,
          "instructionFollowing": 1
        },
        "categoryRankingEligible": {
          "agentic": false,
          "coding": true,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": true,
          "multilingual": false,
          "instructionFollowing": true,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 14,
        "verifiedBenchmarkCount": 14,
        "rankableBenchmarkCount": 14,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 3
      },
      "benchmarks": {
        "agentic": {
          "terminalBench2": 46
        },
        "coding": {
          "liveCodeBench": 87.7,
          "sweVerified": 73.5,
          "swePro": 52.8,
          "terminalBench2": 46
        },
        "reasoning": {
          "graphwalksBfs128k": 90
        },
        "multimodalGrounded": {},
        "knowledge": {
          "gpqa": 84.2,
          "gpqaDiamond": 84.2,
          "mmluPro": 85,
          "simpleQa": 31
        },
        "multilingual": {},
        "instructionFollowing": {
          "ifBench": 85
        },
        "math": {
          "aime2025": 97,
          "aime2026": 94.5,
          "hmmtFeb2026": 84.9
        }
      }
    },
    {
      "slug": "qwen3-6-35b-a3b",
      "canonicalModelKey": "qwen3-6-35b-a3b",
      "model": "Qwen3.6-35B-A3B",
      "creator": "Alibaba",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "262K",
      "contextWindowTokens": 262000,
      "displayScore": 65,
      "rankingEligible": true,
      "overallRank": 45,
      "url": "https://benchlm.ai/models/qwen3-6-35b-a3b",
      "markdownUrl": "https://benchlm.ai/md/models/qwen3-6-35b-a3b.md",
      "id": 76,
      "releaseDate": "2026-04-15",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "qwen3-6-35b-a3b",
        "familyName": "Qwen3.6-35B-A3B",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "qwen3-6-35b-a3b",
        "relatedModelKeys": [
          "qwen3-6-plus",
          "qwen3-5-35b-a3b"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 65,
        "overallScore": 62,
        "rawOverallScore": 62,
        "verifiedDisplayScore": 62,
        "displayCategoryScores": {
          "agentic": 53.7,
          "coding": 72.2,
          "reasoning": null,
          "multimodalGrounded": 64,
          "knowledge": 65.5,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": 51.5,
          "coding": 66.9,
          "reasoning": null,
          "multimodalGrounded": 76.1,
          "knowledge": 60.5,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": true,
        "overallRank": 45,
        "categoryRanks": {
          "coding": 38,
          "multimodalGrounded": 45,
          "knowledge": 43
        },
        "categoryRankingEligible": {
          "agentic": false,
          "coding": true,
          "reasoning": false,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 40,
        "verifiedBenchmarkCount": 40,
        "rankableBenchmarkCount": 40,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 2
      },
      "benchmarks": {
        "agentic": {
          "terminalBench2": 51.5,
          "clawEval": 68.7,
          "qwenClawBench": 52.6,
          "qwenWebBench": 1397,
          "tau3Bench": 67.2,
          "vitaBench": 35.6,
          "deepPlanning": 25.9,
          "toolathlon": 26.9,
          "mcpAtlas": 62.8,
          "wideResearch": 60.1,
          "aaAgenticIndex": 58.34,
          "tau2Bench": 95.3,
          "gdpvalAaNormalized": 39.9,
          "gdpvalAa": 1298,
          "gertLabs": 42.65
        },
        "coding": {
          "sweVerified": 73.4,
          "sweMultilingual": 67.2,
          "swePro": 49.5,
          "terminalBench2": 51.5,
          "liveCodeBench": 80.4,
          "nl2Repo": 29.4,
          "aaCodingIndex": 35.15,
          "terminalBenchHard": 34.8,
          "aaSciCode": 35.8
        },
        "reasoning": {
          "lcr": 63.7,
          "critpt": 0.3
        },
        "multimodalGrounded": {
          "mmmu": 81.7,
          "mmmuPro": 75.3,
          "realWorldQa": 85.3,
          "omniDocBench15": 89.9,
          "charxiv": 78,
          "simpleVqa": 58.9,
          "ccOcr": 81.9,
          "ai2dTest": 92.7,
          "refcocoAvg": 92,
          "odinw13": 50.8,
          "videoMmeWithSub": 86.6,
          "videoMmeNoSub": 82.5,
          "videoMmmu": 83.7,
          "mlvuAvg": 86.2,
          "aaMmmuPro": 75
        },
        "knowledge": {
          "mmluPro": 85.2,
          "superGpqa": 64.7,
          "cEval": 90,
          "gpqa": 86,
          "hle": 21.4,
          "artificialAnalysis": 43.49,
          "aaGpqaDiamond": 84.1,
          "aaHle": 20.2,
          "aaOmniscienceIndex": -21.4,
          "omniscienceAccuracy": 18.9,
          "omniscienceHallucinationRate": 49.7
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 64.4
        },
        "math": {
          "hmmtFeb2025": 90.7,
          "hmmtNov2025": 89.1,
          "hmmtFeb2026": 83.6,
          "mmAnswerBench": 78.9,
          "aime2026": 92.7
        }
      }
    },
    {
      "slug": "claude-sonnet-4-5",
      "canonicalModelKey": "claude-sonnet-4-5",
      "model": "Claude Sonnet 4.5",
      "creator": "Anthropic",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "200K",
      "contextWindowTokens": 200000,
      "displayScore": 64,
      "rankingEligible": true,
      "overallRank": 46,
      "url": "https://benchlm.ai/models/claude-sonnet-4-5",
      "markdownUrl": "https://benchlm.ai/md/models/claude-sonnet-4-5.md",
      "id": 31,
      "releaseDate": "2025-09-01",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "claude-sonnet-4-5",
        "familyName": "Claude Sonnet 4.5",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "claude-sonnet-4-5",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 64,
        "overallScore": 75,
        "rawOverallScore": 75,
        "verifiedDisplayScore": 56,
        "displayCategoryScores": {
          "agentic": 54,
          "coding": 78.4,
          "reasoning": 62,
          "multimodalGrounded": 94.8,
          "knowledge": 73.7,
          "multilingual": 84.8,
          "instructionFollowing": 84.2,
          "math": 87.7
        },
        "verifiedDisplayCategoryScores": {
          "agentic": 55.3,
          "coding": 77.2,
          "reasoning": 13.6,
          "multimodalGrounded": null,
          "knowledge": 83.4,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 46,
        "categoryRanks": {
          "agentic": 40,
          "coding": 25,
          "reasoning": 32,
          "multimodalGrounded": 6,
          "knowledge": 32,
          "multilingual": 12,
          "instructionFollowing": 29,
          "math": 16
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 6,
        "verifiedBenchmarkCount": 6,
        "rankableBenchmarkCount": 38,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "terminalBench2": 50,
          "osWorldVerified": 61.4,
          "vitaBench": 17,
          "gertLabs": 48.51
        },
        "coding": {
          "sweVerified": 77.2
        },
        "reasoning": {
          "arcAgi2": 13.6
        },
        "multimodalGrounded": {
          "designArenaWebsite": 1235
        },
        "knowledge": {
          "gpqa": 83.4
        },
        "multilingual": {},
        "instructionFollowing": {},
        "math": {
          "aime2025": 87
        }
      }
    },
    {
      "slug": "kimi-k2-5",
      "canonicalModelKey": "kimi-k2-5",
      "model": "Kimi K2.5",
      "creator": "Moonshot AI",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "256K",
      "contextWindowTokens": 256000,
      "displayScore": 63,
      "rankingEligible": true,
      "overallRank": 47,
      "url": "https://benchlm.ai/models/kimi-k2-5",
      "markdownUrl": "https://benchlm.ai/md/models/kimi-k2-5.md",
      "id": 53,
      "releaseDate": "2026-02-01",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "kimi-k2-5",
        "familyName": "Kimi K2.5",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "kimi-k2-5",
        "relatedModelKeys": [
          "kimi-k2-5-reasoning"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 63,
        "overallScore": 63,
        "rawOverallScore": 63,
        "verifiedDisplayScore": 68,
        "displayCategoryScores": {
          "agentic": 45,
          "coding": 81,
          "reasoning": 54.6,
          "multimodalGrounded": 63.1,
          "knowledge": 70.6,
          "multilingual": 68.4,
          "instructionFollowing": 84.5,
          "math": 56.8
        },
        "verifiedDisplayCategoryScores": {
          "agentic": 54.6,
          "coding": 64.2,
          "reasoning": 61,
          "multimodalGrounded": 78.5,
          "knowledge": 65.1,
          "multilingual": 82.3,
          "instructionFollowing": 93.9,
          "math": 96.1
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": true,
        "overallRank": 47,
        "categoryRanks": {
          "agentic": 50,
          "coding": 21,
          "reasoning": 49,
          "multimodalGrounded": 48,
          "knowledge": 37,
          "multilingual": 35,
          "instructionFollowing": 27,
          "math": 43
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 39,
        "verifiedBenchmarkCount": 39,
        "rankableBenchmarkCount": 108,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 4
      },
      "benchmarks": {
        "agentic": {
          "terminalBench2": 50.8,
          "browseComp": 60.6,
          "clawEval": 52.3,
          "qwenClawBench": 54.3,
          "tau3Bench": 65.7,
          "deepSearchQa": 77.1,
          "deepPlanning": 14.4,
          "toolathlon": 27.8,
          "mcpAtlas": 29.5,
          "mcpTasks": 59.1,
          "wideResearch": 72.7,
          "tau2Bench": 95.9,
          "aaAgenticIndex": 58.94,
          "apexAgentsAa": 11.5,
          "gdpvalAaNormalized": 39.2,
          "gdpvalAa": 1284,
          "gertLabs": 45.88
        },
        "coding": {
          "sweVerified": 76.8,
          "sweVerifiedArcee": 70.8,
          "liveCodeBench": 85,
          "liveCodeBenchV6": 85,
          "swePro": 50.7,
          "sweMultilingual": 73,
          "sweRebench": 58.5,
          "reactNativeEvals": 77.2,
          "sciCode": 48.7,
          "aaCodingIndex": 39.55,
          "terminalBenchHard": 34.8,
          "aaSciCode": 49
        },
        "reasoning": {
          "longBenchV2": 61,
          "lcr": 65.3,
          "critpt": 3.1
        },
        "multimodalGrounded": {
          "mmmuPro": 78.5,
          "videoMme": 87.4,
          "mmvu": 80.4,
          "videoMmmu": 86.6,
          "aaMmmuPro": 75.4,
          "designArenaWebsite": 1294
        },
        "knowledge": {
          "gpqa": 87.6,
          "gpqaDiamond": 87.6,
          "superGpqa": 69.2,
          "mmluPro": 87.1,
          "mmluProArcee": 87.1,
          "hle": 30.1,
          "artificialAnalysis": 46.81,
          "aaGpqaDiamond": 87.9,
          "aaHle": 29.4,
          "aaOmniscienceIndex": -8.1,
          "omniscienceAccuracy": 34.3,
          "omniscienceHallucinationRate": 64.6
        },
        "multilingual": {
          "mmluProX": 82.3,
          "nova63": 56
        },
        "instructionFollowing": {
          "ifeval": 93.9,
          "aaIfBench": 70.2
        },
        "math": {
          "aime2025": 96.1,
          "aime2026": 95.8,
          "aime2025Arcee": 96.3,
          "hmmtFeb2025": 95.4,
          "hmmtNov2025": 91.1,
          "hmmtFeb2026": 87.1,
          "mmAnswerBench": 81.8
        }
      }
    },
    {
      "slug": "qwen3-5-122b-a10b",
      "canonicalModelKey": "qwen3-5-122b-a10b",
      "model": "Qwen3.5-122B-A10B",
      "creator": "Alibaba",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "262K",
      "contextWindowTokens": 262000,
      "displayScore": 63,
      "rankingEligible": true,
      "overallRank": 48,
      "url": "https://benchlm.ai/models/qwen3-5-122b-a10b",
      "markdownUrl": "https://benchlm.ai/md/models/qwen3-5-122b-a10b.md",
      "id": 37,
      "releaseDate": "2026-03-04",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "qwen3-5-122b-a10b",
        "familyName": "Qwen3.5-122B-A10B",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "qwen3-5-122b-a10b",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 63,
        "overallScore": 66,
        "rawOverallScore": 66,
        "verifiedDisplayScore": 70,
        "displayCategoryScores": {
          "agentic": 55.4,
          "coding": 70.7,
          "reasoning": 39,
          "multimodalGrounded": 65.2,
          "knowledge": 80.2,
          "multilingual": 70.6,
          "instructionFollowing": 87.8,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": 56.1,
          "coding": 72,
          "reasoning": 60.2,
          "multimodalGrounded": 77.2,
          "knowledge": 81.6,
          "multilingual": 82.2,
          "instructionFollowing": 93.4,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": true,
        "overallRank": 48,
        "categoryRanks": {
          "agentic": 37,
          "multimodalGrounded": 40,
          "knowledge": 18,
          "multilingual": 32,
          "instructionFollowing": 22
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 13,
        "verifiedBenchmarkCount": 13,
        "rankableBenchmarkCount": 19,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 3
      },
      "benchmarks": {
        "agentic": {
          "terminalBench2": 49.4,
          "browseComp": 63.8,
          "osWorldVerified": 58,
          "tau2Bench": 93.6,
          "aaAgenticIndex": 53,
          "gdpvalAaNormalized": 30.7,
          "gdpvalAa": 1115
        },
        "coding": {
          "sweVerified": 72,
          "aaCodingIndex": 34.71,
          "terminalBenchHard": 31.1,
          "aaSciCode": 42
        },
        "reasoning": {
          "longBenchV2": 60.2,
          "lcr": 66.7,
          "critpt": 0.6
        },
        "multimodalGrounded": {
          "mmmu": 83.9,
          "mmvu": 74.7,
          "mathVision": 86.2,
          "charxiv": 77.2,
          "vStar": 93.2,
          "aaMmmuPro": 75
        },
        "knowledge": {
          "mmluPro": 86.7,
          "superGpqa": 67.1,
          "gpqa": 86.6,
          "artificialAnalysis": 41.6,
          "aaGpqaDiamond": 85.7,
          "aaHle": 23.4,
          "aaOmniscienceIndex": -39.6,
          "omniscienceAccuracy": 24.7,
          "omniscienceHallucinationRate": 85.5
        },
        "multilingual": {
          "mmluProX": 82.2
        },
        "instructionFollowing": {
          "ifeval": 93.4,
          "aaIfBench": 75.7
        },
        "math": {}
      }
    },
    {
      "slug": "gemini-2-5-pro",
      "canonicalModelKey": "gemini-2-5-pro",
      "model": "Gemini 2.5 Pro",
      "creator": "Google",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 63,
      "rankingEligible": true,
      "overallRank": 49,
      "url": "https://benchlm.ai/models/gemini-2-5-pro",
      "markdownUrl": "https://benchlm.ai/md/models/gemini-2-5-pro.md",
      "id": 68,
      "releaseDate": "2025-03-01",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "gemini-2-5-pro",
        "familyName": "Gemini 2.5 Pro",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "gemini-2-5-pro",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 63,
        "overallScore": 63,
        "rawOverallScore": 63,
        "verifiedDisplayScore": 47,
        "displayCategoryScores": {
          "agentic": 57.5,
          "coding": 48.5,
          "reasoning": 59,
          "multimodalGrounded": 85.2,
          "knowledge": 64.9,
          "multilingual": 68.9,
          "instructionFollowing": 59,
          "math": 73.5
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": 63.8,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 18.8,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 49,
        "categoryRanks": {
          "agentic": 32,
          "coding": 61,
          "reasoning": 37,
          "multimodalGrounded": 13,
          "knowledge": 46,
          "multilingual": 34,
          "instructionFollowing": 71,
          "math": 30
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 3,
        "verifiedBenchmarkCount": 3,
        "rankableBenchmarkCount": 38,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 32.68,
          "tau2Bench": 54.1,
          "gdpvalAaNormalized": 20.9,
          "gdpvalAa": 918,
          "gertLabs": 42.01
        },
        "coding": {
          "sweVerified": 63.8,
          "vibeCodeBench": 0.4,
          "aaCodingIndex": 31.95,
          "terminalBenchHard": 26.5,
          "aaSciCode": 42.8
        },
        "reasoning": {
          "lcr": 66,
          "critpt": 2.6
        },
        "multimodalGrounded": {
          "aaMmmuPro": 74.9,
          "designArenaWebsite": 1212
        },
        "knowledge": {
          "gpqa": 83,
          "hle": 18.8,
          "artificialAnalysis": 34.63,
          "aaGpqaDiamond": 84.4,
          "aaHle": 21.1,
          "aaOmniscienceIndex": -14.3,
          "omniscienceAccuracy": 39,
          "omniscienceHallucinationRate": 87.4
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 48.7
        },
        "math": {}
      }
    },
    {
      "slug": "grok-4",
      "canonicalModelKey": "grok-4",
      "model": "Grok 4",
      "creator": "xAI",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 63,
      "rankingEligible": true,
      "overallRank": 50,
      "url": "https://benchlm.ai/models/grok-4",
      "markdownUrl": "https://benchlm.ai/md/models/grok-4.md",
      "id": 56,
      "releaseDate": "2025-07-09",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "grok-4",
        "familyName": "Grok 4",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "grok-4",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 63,
        "overallScore": 64,
        "rawOverallScore": 64,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 53.1,
          "coding": 76,
          "reasoning": 55.2,
          "multimodalGrounded": 73,
          "knowledge": 65.2,
          "multilingual": 63.4,
          "instructionFollowing": 54.9,
          "math": 80
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 50,
        "categoryRanks": {
          "agentic": 44,
          "coding": 33,
          "reasoning": 45,
          "multimodalGrounded": 25,
          "knowledge": 45,
          "multilingual": 45,
          "instructionFollowing": 77,
          "math": 24
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 1,
        "verifiedBenchmarkCount": 1,
        "rankableBenchmarkCount": 34,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 41.5,
          "tau2Bench": 74.9,
          "gdpvalAaNormalized": 24.6,
          "gdpvalAa": 991,
          "gertLabs": 42.34
        },
        "coding": {
          "reactNativeEvals": 72.6,
          "aaCodingIndex": 40.49,
          "terminalBenchHard": 37.9,
          "aaSciCode": 45.7
        },
        "reasoning": {
          "lcr": 68,
          "critpt": 2
        },
        "multimodalGrounded": {
          "aaMmmuPro": 68.8
        },
        "knowledge": {
          "artificialAnalysis": 41.52,
          "aaGpqaDiamond": 87.7,
          "aaHle": 23.9,
          "aaOmniscienceIndex": 3.8,
          "omniscienceAccuracy": 41.4,
          "omniscienceHallucinationRate": 64.2
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 53.7
        },
        "math": {}
      }
    },
    {
      "slug": "qwen3-5-397b",
      "canonicalModelKey": "qwen3-5-397b",
      "model": "Qwen3.5 397B",
      "creator": "Alibaba",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 62,
      "rankingEligible": true,
      "overallRank": 51,
      "url": "https://benchlm.ai/models/qwen3-5-397b",
      "markdownUrl": "https://benchlm.ai/md/models/qwen3-5-397b.md",
      "id": 57,
      "releaseDate": "2026-02-16",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "qwen3-5-397b",
        "familyName": "Qwen3.5 397B",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "qwen3-5-397b",
        "relatedModelKeys": [
          "qwen3-5-397b-reasoning"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 62,
        "overallScore": 63,
        "rawOverallScore": 63,
        "verifiedDisplayScore": 66,
        "displayCategoryScores": {
          "agentic": 53.5,
          "coding": 67.5,
          "reasoning": 58.4,
          "multimodalGrounded": 65.1,
          "knowledge": 71.4,
          "multilingual": 71.8,
          "instructionFollowing": 84.8,
          "math": 73.3
        },
        "verifiedDisplayCategoryScores": {
          "agentic": 56.2,
          "coding": 60.3,
          "reasoning": 63.2,
          "multimodalGrounded": 79.6,
          "knowledge": 65.2,
          "multilingual": 84.7,
          "instructionFollowing": 92.6,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": true,
        "overallRank": 51,
        "categoryRanks": {
          "agentic": 42,
          "coding": 44,
          "reasoning": 40,
          "multimodalGrounded": 41,
          "knowledge": 35,
          "multilingual": 28,
          "instructionFollowing": 26,
          "math": 31
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 36,
        "verifiedBenchmarkCount": 36,
        "rankableBenchmarkCount": 86,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 4
      },
      "benchmarks": {
        "agentic": {
          "terminalBench2": 52.5,
          "browseComp": 62,
          "clawEval": 56.8,
          "qwenClawBench": 51.8,
          "tau3Bench": 68.4,
          "vitaBench": 43.7,
          "deepPlanning": 37.6,
          "toolathlon": 36.3,
          "mcpAtlas": 46.1,
          "mcpTasks": 74.2,
          "wideResearch": 74,
          "aaAgenticIndex": 53.32,
          "tau2Bench": 83.9,
          "gdpvalAaNormalized": 35.8,
          "gdpvalAa": 1217,
          "gertLabs": 46.76
        },
        "coding": {
          "sweVerified": 76.2,
          "liveCodeBenchV6": 83.6,
          "swePro": 50.9,
          "aaCodingIndex": 37.43,
          "terminalBenchHard": 35.6,
          "aaSciCode": 41.1
        },
        "reasoning": {
          "longBenchV2": 63.2,
          "aiNeedle": 68.7,
          "lcr": 58,
          "critpt": 0.9
        },
        "multimodalGrounded": {
          "mmmuPro": 79,
          "mathVision": 88.6,
          "charxiv": 80.8,
          "videoMmmu": 84.7,
          "screenSpotPro": 65.6,
          "vStar": 95.8,
          "aaMmmuPro": 52.7
        },
        "knowledge": {
          "gpqa": 88.4,
          "superGpqa": 70.4,
          "mmluPro": 87.8,
          "mmluRedux": 94.9,
          "cEval": 93,
          "hle": 28.7,
          "artificialAnalysis": 40.1,
          "aaGpqaDiamond": 86.1,
          "aaHle": 18.8,
          "aaOmniscienceIndex": -36.1,
          "omniscienceAccuracy": 24.3,
          "omniscienceHallucinationRate": 79.8
        },
        "multilingual": {
          "mmluProX": 84.7,
          "nova63": 59.1
        },
        "instructionFollowing": {
          "ifeval": 92.6,
          "aaIfBench": 51.6
        },
        "math": {
          "aime2026": 93.3,
          "hmmtFeb2025": 94.8,
          "hmmtNov2025": 92.7,
          "hmmtFeb2026": 87.9,
          "mmAnswerBench": 80.9
        }
      }
    },
    {
      "slug": "qwen3-5-27b",
      "canonicalModelKey": "qwen3-5-27b",
      "model": "Qwen3.5-27B",
      "creator": "Alibaba",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "262K",
      "contextWindowTokens": 262000,
      "displayScore": 61,
      "rankingEligible": true,
      "overallRank": 52,
      "url": "https://benchlm.ai/models/qwen3-5-27b",
      "markdownUrl": "https://benchlm.ai/md/models/qwen3-5-27b.md",
      "id": 47,
      "releaseDate": "2026-03-04",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "qwen3-5-27b",
        "familyName": "Qwen3.5-27B",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "qwen3-5-27b",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 61,
        "overallScore": 63,
        "rawOverallScore": 63,
        "verifiedDisplayScore": 66,
        "displayCategoryScores": {
          "agentic": 50.2,
          "coding": 70.3,
          "reasoning": 39.1,
          "multimodalGrounded": 65.1,
          "knowledge": 78,
          "multilingual": 70.6,
          "instructionFollowing": 89,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": 51.6,
          "coding": 63,
          "reasoning": 60.6,
          "multimodalGrounded": null,
          "knowledge": 80.6,
          "multilingual": 82.2,
          "instructionFollowing": 95,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": true,
        "overallRank": 52,
        "categoryRanks": {
          "agentic": 45,
          "coding": 39,
          "multimodalGrounded": 42,
          "knowledge": 24,
          "multilingual": 33,
          "instructionFollowing": 20
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": false,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 13,
        "verifiedBenchmarkCount": 13,
        "rankableBenchmarkCount": 20,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 3
      },
      "benchmarks": {
        "agentic": {
          "terminalBench2": 41.6,
          "browseComp": 61,
          "osWorldVerified": 56.2,
          "tau2Bench": 93.9,
          "aaAgenticIndex": 54.61,
          "gdpvalAaNormalized": 33,
          "gdpvalAa": 1160,
          "gertLabs": 39.41
        },
        "coding": {
          "sweVerified": 72.4,
          "sweRebench": 58.9,
          "aaCodingIndex": 34.87,
          "terminalBenchHard": 32.6,
          "aaSciCode": 39.5
        },
        "reasoning": {
          "longBenchV2": 60.6,
          "lcr": 67.3,
          "critpt": 0.9
        },
        "multimodalGrounded": {
          "mmmu": 82.3,
          "mmvu": 73.3,
          "mathVision": 86,
          "vStar": 93.7,
          "aaMmmuPro": 75
        },
        "knowledge": {
          "mmluPro": 86.1,
          "superGpqa": 65.6,
          "gpqa": 85.5,
          "artificialAnalysis": 42.07,
          "aaGpqaDiamond": 85.8,
          "aaHle": 22.2,
          "aaOmniscienceIndex": -42,
          "omniscienceAccuracy": 21,
          "omniscienceHallucinationRate": 79.7
        },
        "multilingual": {
          "mmluProX": 82.2
        },
        "instructionFollowing": {
          "ifeval": 95,
          "aaIfBench": 75.6
        },
        "math": {}
      }
    },
    {
      "slug": "deepseek-v3-2-thinking",
      "canonicalModelKey": "deepseek-v3-2-thinking",
      "model": "DeepSeek V3.2 (Thinking)",
      "creator": "DeepSeek",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 60,
      "rankingEligible": true,
      "overallRank": 53,
      "url": "https://benchlm.ai/models/deepseek-v3-2-thinking",
      "markdownUrl": "https://benchlm.ai/md/models/deepseek-v3-2-thinking.md",
      "id": 72,
      "releaseDate": "2025-12-01",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "deepseek-v3-2",
        "familyName": "DeepSeek V3.2",
        "variantType": "reasoning",
        "snapshotLabel": null,
        "baseFamilyModelKey": "deepseek-v3-2",
        "relatedModelKeys": [
          "deepseek-v3-2"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 60,
        "overallScore": 59,
        "rawOverallScore": 59,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 61.5,
          "coding": 51.5,
          "reasoning": 55,
          "multimodalGrounded": 59,
          "knowledge": 66.7,
          "multilingual": 63.4,
          "instructionFollowing": 62.1,
          "math": 51.7
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 53,
        "categoryRanks": {
          "agentic": 27,
          "coding": 57,
          "reasoning": 47,
          "multimodalGrounded": 53,
          "knowledge": 40,
          "multilingual": 46,
          "instructionFollowing": 67,
          "math": 50
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 1,
        "verifiedBenchmarkCount": 1,
        "rankableBenchmarkCount": 42,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {
          "vibeCodeBench": 5.108
        },
        "reasoning": {},
        "multimodalGrounded": {
          "designArenaWebsite": 1222
        },
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "mimo-v2-flash",
      "canonicalModelKey": "mimo-v2-flash",
      "model": "MiMo-V2-Flash",
      "creator": "Xiaomi",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "256K",
      "contextWindowTokens": 256000,
      "displayScore": 59,
      "rankingEligible": true,
      "overallRank": 54,
      "url": "https://benchlm.ai/models/mimo-v2-flash",
      "markdownUrl": "https://benchlm.ai/md/models/mimo-v2-flash.md",
      "id": 42,
      "releaseDate": "2026-03-15",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "mimo-v2-flash",
        "familyName": "MiMo-V2-Flash",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "mimo-v2-flash",
        "relatedModelKeys": [
          "mimo-v2-pro",
          "mimo-v2-omni"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 59,
        "overallScore": 66,
        "rawOverallScore": 66,
        "verifiedDisplayScore": 73,
        "displayCategoryScores": {
          "agentic": 57.1,
          "coding": 68.6,
          "reasoning": 55.6,
          "multimodalGrounded": 68.7,
          "knowledge": 63.3,
          "multilingual": 58.8,
          "instructionFollowing": 62.3,
          "math": 82.1
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": 73.4,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 54,
        "categoryRanks": {
          "agentic": 33,
          "coding": 42,
          "reasoning": 44,
          "multimodalGrounded": 31,
          "knowledge": 48,
          "multilingual": 55,
          "instructionFollowing": 64,
          "math": 20
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 1,
        "verifiedBenchmarkCount": 1,
        "rankableBenchmarkCount": 35,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 47.34,
          "tau2Bench": 83.9,
          "gdpvalAaNormalized": 28,
          "gdpvalAa": 1059
        },
        "coding": {
          "sweVerified": 73.4,
          "aaCodingIndex": 25.81,
          "terminalBenchHard": 25.8,
          "aaSciCode": 25.9
        },
        "reasoning": {
          "lcr": 31.3,
          "critpt": 0
        },
        "multimodalGrounded": {
          "designArenaWebsite": 1212
        },
        "knowledge": {
          "gpqa": 83.7,
          "mmluPro": 84.9,
          "artificialAnalysis": 30.35,
          "aaGpqaDiamond": 65.6,
          "aaHle": 8,
          "aaOmniscienceIndex": -48.5,
          "omniscienceAccuracy": 15.2,
          "omniscienceHallucinationRate": 75.1
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 39.9
        },
        "math": {
          "aime2025": 94.1
        }
      }
    },
    {
      "slug": "deepseek-v4-flash",
      "canonicalModelKey": "deepseek-v4-flash",
      "model": "DeepSeek V4 Flash",
      "creator": "DeepSeek",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 57,
      "rankingEligible": true,
      "overallRank": 55,
      "url": "https://benchlm.ai/models/deepseek-v4-flash",
      "markdownUrl": "https://benchlm.ai/md/models/deepseek-v4-flash.md",
      "id": 122,
      "releaseDate": "2026-04-24",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "deepseek-v4",
        "familyName": "DeepSeek V4",
        "variantType": "flash",
        "snapshotLabel": "non-think",
        "baseFamilyModelKey": "deepseek-v4-pro-max",
        "relatedModelKeys": [
          "deepseek-v4-flash-base",
          "deepseek-v4-pro-base",
          "deepseek-v4-flash-high",
          "deepseek-v4-flash-max",
          "deepseek-v4-pro",
          "deepseek-v4-pro-high",
          "deepseek-v4-pro-max"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 57,
        "overallScore": 49,
        "rawOverallScore": 49,
        "verifiedDisplayScore": 51,
        "displayCategoryScores": {
          "agentic": 47.7,
          "coding": 62,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 44.6,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": 49.1,
          "coding": 57.1,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 45.2,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": true,
        "overallRank": 55,
        "categoryRanks": {
          "coding": 46,
          "knowledge": 68
        },
        "categoryRankingEligible": {
          "agentic": false,
          "coding": true,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": true,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 21,
        "verifiedBenchmarkCount": 21,
        "rankableBenchmarkCount": 22,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 2
      },
      "benchmarks": {
        "agentic": {
          "terminalBench2": 49.1,
          "mcpAtlas": 64,
          "toolathlon": 40.7,
          "clawEval": 57.8,
          "gertLabs": 54.35
        },
        "coding": {
          "liveCodeBench": 55.2,
          "sweVerified": 73.7,
          "swePro": 49.1,
          "sweMultilingual": 69.7,
          "terminalBench2": 49.1
        },
        "reasoning": {
          "mrcr1m": 37.5,
          "corpusQa1m": 15.5
        },
        "multimodalGrounded": {
          "designArenaWebsite": 1259
        },
        "knowledge": {
          "mmluPro": 83,
          "simpleQa": 23.1,
          "chineseSimpleQa": 71.5,
          "gpqa": 71.2,
          "gpqaDiamond": 71.2,
          "hle": 8.1
        },
        "multilingual": {},
        "instructionFollowing": {},
        "math": {
          "hmmtFeb2026": 40.8,
          "imoAnswerBench": 41.9,
          "apex": 1,
          "apexShortlist": 9.3
        }
      }
    },
    {
      "slug": "gpt-4-1",
      "canonicalModelKey": "gpt-4-1",
      "model": "GPT-4.1",
      "creator": "OpenAI",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 57,
      "rankingEligible": true,
      "overallRank": 56,
      "url": "https://benchlm.ai/models/gpt-4-1",
      "markdownUrl": "https://benchlm.ai/md/models/gpt-4-1.md",
      "id": 59,
      "releaseDate": "2025-04-14",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "gpt-4-1",
        "familyName": "GPT-4.1",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "gpt-4-1",
        "relatedModelKeys": [
          "gpt-4-1-mini",
          "gpt-4-1-nano"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": "gpt-4o"
      },
      "scores": {
        "displayScore": 57,
        "overallScore": 64,
        "rawOverallScore": 64,
        "verifiedDisplayScore": 63,
        "displayCategoryScores": {
          "agentic": 55.4,
          "coding": 54.6,
          "reasoning": 76.4,
          "multimodalGrounded": 63.9,
          "knowledge": 51.5,
          "multilingual": 33.6,
          "instructionFollowing": 74.7,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": 54.6,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 66.3,
          "multilingual": null,
          "instructionFollowing": 87.4,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 56,
        "categoryRanks": {
          "agentic": 38,
          "reasoning": 18,
          "multimodalGrounded": 46,
          "multilingual": 75,
          "instructionFollowing": 49
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": false,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": false,
          "multilingual": true,
          "instructionFollowing": true,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 4,
        "verifiedBenchmarkCount": 4,
        "rankableBenchmarkCount": 17,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 27.26,
          "tau2Bench": 47.1,
          "gdpvalAaNormalized": 13.8,
          "gdpvalAa": 777,
          "gertLabs": 25.65
        },
        "coding": {
          "sweVerified": 54.6,
          "aaCodingIndex": 21.78,
          "terminalBenchHard": 13.6,
          "aaSciCode": 38.1
        },
        "reasoning": {
          "lcr": 61,
          "critpt": 0
        },
        "multimodalGrounded": {
          "aaMmmuPro": 61.2,
          "designArenaWebsite": 1084
        },
        "knowledge": {
          "mmlu": 90.2,
          "gpqa": 66.3,
          "artificialAnalysis": 26.28,
          "aaGpqaDiamond": 66.6,
          "aaHle": 4.6,
          "aaOmniscienceIndex": -36.2,
          "omniscienceAccuracy": 24.2,
          "omniscienceHallucinationRate": 79.6
        },
        "multilingual": {},
        "instructionFollowing": {
          "ifeval": 87.4,
          "aaIfBench": 43
        },
        "math": {},
        "korean": {}
      }
    },
    {
      "slug": "o3-pro",
      "canonicalModelKey": "o3-pro",
      "model": "o3-pro",
      "creator": "OpenAI",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "200K",
      "contextWindowTokens": 200000,
      "displayScore": 57,
      "rankingEligible": true,
      "overallRank": 57,
      "url": "https://benchlm.ai/models/o3-pro",
      "markdownUrl": "https://benchlm.ai/md/models/o3-pro.md",
      "id": 39,
      "releaseDate": "2025-04-16",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "o3",
        "familyName": "o3",
        "variantType": "pro",
        "snapshotLabel": null,
        "baseFamilyModelKey": "o3",
        "relatedModelKeys": [
          "o3",
          "o3-mini"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 57,
        "overallScore": 70,
        "rawOverallScore": 70,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 61.4,
          "coding": 54.5,
          "reasoning": 70.9,
          "multimodalGrounded": 64.7,
          "knowledge": 66.6,
          "multilingual": 64.2,
          "instructionFollowing": 54.9,
          "math": 86.4
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 57,
        "categoryRanks": {
          "agentic": 29,
          "coding": 53,
          "reasoning": 23,
          "multimodalGrounded": 44,
          "knowledge": 41,
          "multilingual": 43,
          "instructionFollowing": 78,
          "math": 17
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 32,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {
          "artificialAnalysis": 40.69,
          "aaGpqaDiamond": 84.5
        },
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "o1",
      "canonicalModelKey": "o1",
      "model": "o1",
      "creator": "OpenAI",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "200K",
      "contextWindowTokens": 200000,
      "displayScore": 57,
      "rankingEligible": true,
      "overallRank": 58,
      "url": "https://benchlm.ai/models/o1",
      "markdownUrl": "https://benchlm.ai/md/models/o1.md",
      "id": 58,
      "releaseDate": "2024-12-01",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "o1",
        "familyName": "o1",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "o1",
        "relatedModelKeys": [
          "o1-pro",
          "o1-preview"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": "o1-preview"
      },
      "scores": {
        "displayScore": 57,
        "overallScore": 65,
        "rawOverallScore": 65,
        "verifiedDisplayScore": 81,
        "displayCategoryScores": {
          "agentic": 56.4,
          "coding": 48.2,
          "reasoning": 75,
          "multimodalGrounded": 58.9,
          "knowledge": 61.1,
          "multilingual": 56,
          "instructionFollowing": 92.3,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 75.7,
          "multilingual": null,
          "instructionFollowing": 92.2,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 58,
        "categoryRanks": {
          "agentic": 35,
          "reasoning": 20,
          "multimodalGrounded": 54,
          "multilingual": 59,
          "instructionFollowing": 13
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": false,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": false,
          "multilingual": true,
          "instructionFollowing": true,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 3,
        "verifiedBenchmarkCount": 3,
        "rankableBenchmarkCount": 16,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 31.08,
          "tau2Bench": 62.6,
          "gdpvalAaNormalized": 11.5,
          "gdpvalAa": 730
        },
        "coding": {
          "aaCodingIndex": 20.51,
          "terminalBenchHard": 12.9,
          "aaSciCode": 35.8
        },
        "reasoning": {
          "lcr": 59.3,
          "critpt": 0.3
        },
        "multimodalGrounded": {},
        "knowledge": {
          "mmlu": 91.8,
          "gpqa": 75.7,
          "artificialAnalysis": 30.75,
          "aaGpqaDiamond": 74.7,
          "aaHle": 7.7,
          "aaOmniscienceIndex": -10.5,
          "omniscienceAccuracy": 34.7,
          "omniscienceHallucinationRate": 69.3
        },
        "multilingual": {},
        "instructionFollowing": {
          "ifeval": 92.2,
          "aaIfBench": 70.3
        },
        "math": {},
        "korean": {}
      }
    },
    {
      "slug": "deepseek-v3-2",
      "canonicalModelKey": "deepseek-v3-2",
      "model": "DeepSeek V3.2",
      "creator": "DeepSeek",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 56,
      "rankingEligible": true,
      "overallRank": 59,
      "url": "https://benchlm.ai/models/deepseek-v3-2",
      "markdownUrl": "https://benchlm.ai/md/models/deepseek-v3-2.md",
      "id": 84,
      "releaseDate": "2025-12-01",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "deepseek-v3-2",
        "familyName": "DeepSeek V3.2",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "deepseek-v3-2",
        "relatedModelKeys": [
          "deepseek-v3-2-thinking"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 56,
        "overallScore": 55,
        "rawOverallScore": 55,
        "verifiedDisplayScore": 61,
        "displayCategoryScores": {
          "agentic": 50,
          "coding": 57.3,
          "reasoning": 47.4,
          "multimodalGrounded": 50,
          "knowledge": 59.9,
          "multilingual": 67,
          "instructionFollowing": 61.7,
          "math": 70.5
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": 60.9,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 59,
        "categoryRanks": {
          "agentic": 46,
          "coding": 51,
          "reasoning": 56,
          "multimodalGrounded": 69,
          "knowledge": 51,
          "multilingual": 39,
          "instructionFollowing": 68,
          "math": 34
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 4,
        "verifiedBenchmarkCount": 4,
        "rankableBenchmarkCount": 42,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "clawEval": 40.2,
          "vitaBench": 18.5,
          "aaAgenticIndex": 39.82,
          "tau2Bench": 78.9,
          "gdpvalAaNormalized": 18.8,
          "gdpvalAa": 876,
          "gertLabs": 29.57
        },
        "coding": {
          "sweRebench": 60.9,
          "reactNativeEvals": 71.5,
          "aaCodingIndex": 34.6,
          "terminalBenchHard": 32.6,
          "aaSciCode": 38.7
        },
        "reasoning": {
          "lcr": 39,
          "critpt": 0.9
        },
        "multimodalGrounded": {
          "designArenaWebsite": 1222
        },
        "knowledge": {
          "artificialAnalysis": 32.09,
          "aaGpqaDiamond": 75.1,
          "aaHle": 10.5,
          "aaOmniscienceIndex": -46.7,
          "omniscienceAccuracy": 24.2,
          "omniscienceHallucinationRate": 93.5
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 49
        },
        "math": {}
      }
    },
    {
      "slug": "claude-haiku-4-5",
      "canonicalModelKey": "claude-haiku-4-5",
      "model": "Claude Haiku 4.5",
      "creator": "Anthropic",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "200K",
      "contextWindowTokens": 200000,
      "displayScore": 56,
      "rankingEligible": true,
      "overallRank": 60,
      "url": "https://benchlm.ai/models/claude-haiku-4-5",
      "markdownUrl": "https://benchlm.ai/md/models/claude-haiku-4-5.md",
      "id": 78,
      "releaseDate": "2025-10-15",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "claude-haiku-4-5",
        "familyName": "Claude Haiku 4.5",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "claude-haiku-4-5",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 56,
        "overallScore": 54,
        "rawOverallScore": 54,
        "verifiedDisplayScore": 73,
        "displayCategoryScores": {
          "agentic": 44.2,
          "coding": 53.2,
          "reasoning": 58.6,
          "multimodalGrounded": 73.6,
          "knowledge": 48.3,
          "multilingual": 61.4,
          "instructionFollowing": 69.6,
          "math": 55
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": 73.3,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 60,
        "categoryRanks": {
          "agentic": 53,
          "coding": 56,
          "reasoning": 39,
          "multimodalGrounded": 24,
          "knowledge": 64,
          "multilingual": 52,
          "instructionFollowing": 53,
          "math": 44
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 1,
        "verifiedBenchmarkCount": 1,
        "rankableBenchmarkCount": 34,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {
          "sweVerified": 73.3
        },
        "reasoning": {},
        "multimodalGrounded": {
          "designArenaWebsite": 1167
        },
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "o3",
      "canonicalModelKey": "o3",
      "model": "o3",
      "creator": "OpenAI",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "200K",
      "contextWindowTokens": 200000,
      "displayScore": 56,
      "rankingEligible": true,
      "overallRank": 61,
      "url": "https://benchlm.ai/models/o3",
      "markdownUrl": "https://benchlm.ai/md/models/o3.md",
      "id": 50,
      "releaseDate": "2025-04-16",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "o3",
        "familyName": "o3",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "o3",
        "relatedModelKeys": [
          "o3-pro",
          "o3-mini"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 56,
        "overallScore": 68,
        "rawOverallScore": 68,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 56.5,
          "coding": 65.1,
          "reasoning": 55.2,
          "multimodalGrounded": 61.7,
          "knowledge": 65.4,
          "multilingual": 64.2,
          "instructionFollowing": 65.9,
          "math": 83.4
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 61,
        "categoryRanks": {
          "agentic": 34,
          "coding": 45,
          "reasoning": 46,
          "multimodalGrounded": 50,
          "knowledge": 44,
          "multilingual": 44,
          "instructionFollowing": 58,
          "math": 19
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 36,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 36.09,
          "tau2Bench": 80.7,
          "gdpvalAaNormalized": 12.8,
          "gdpvalAa": 757
        },
        "coding": {
          "aaCodingIndex": 38.4,
          "terminalBenchHard": 37.1,
          "aaSciCode": 41
        },
        "reasoning": {
          "lcr": 69.3,
          "critpt": 1.1
        },
        "multimodalGrounded": {
          "aaMmmuPro": 70.1,
          "designArenaWebsite": 1082
        },
        "knowledge": {
          "artificialAnalysis": 38.37,
          "aaGpqaDiamond": 82.7,
          "aaHle": 20,
          "aaOmniscienceIndex": -15.3,
          "omniscienceAccuracy": 38.4,
          "omniscienceHallucinationRate": 87.1
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 71.4
        },
        "math": {}
      }
    },
    {
      "slug": "qwen3-5-35b-a3b",
      "canonicalModelKey": "qwen3-5-35b-a3b",
      "model": "Qwen3.5-35B-A3B",
      "creator": "Alibaba",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "262K",
      "contextWindowTokens": 262000,
      "displayScore": 55,
      "rankingEligible": true,
      "overallRank": 62,
      "url": "https://benchlm.ai/models/qwen3-5-35b-a3b",
      "markdownUrl": "https://benchlm.ai/md/models/qwen3-5-35b-a3b.md",
      "id": 60,
      "releaseDate": "2026-03-04",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "qwen3-5-35b-a3b",
        "familyName": "Qwen3.5-35B-A3B",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "qwen3-5-35b-a3b",
        "relatedModelKeys": [
          "qwen3-5-flash"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 55,
        "overallScore": 58,
        "rawOverallScore": 58,
        "verifiedDisplayScore": 63,
        "displayCategoryScores": {
          "agentic": 44.7,
          "coding": 59.1,
          "reasoning": 35.2,
          "multimodalGrounded": 63.3,
          "knowledge": 75.3,
          "multilingual": 67.2,
          "instructionFollowing": 81.2,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": 50.6,
          "coding": 58.4,
          "reasoning": 59,
          "multimodalGrounded": null,
          "knowledge": 79.3,
          "multilingual": 81,
          "instructionFollowing": 91.9,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": true,
        "overallRank": 62,
        "categoryRanks": {
          "agentic": 51,
          "coding": 50,
          "multimodalGrounded": 47,
          "knowledge": 28,
          "multilingual": 38,
          "instructionFollowing": 34
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": false,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 13,
        "verifiedBenchmarkCount": 13,
        "rankableBenchmarkCount": 19,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 3
      },
      "benchmarks": {
        "agentic": {
          "terminalBench2": 40.5,
          "browseComp": 61,
          "osWorldVerified": 54.5,
          "tau2Bench": 89.2,
          "aaAgenticIndex": 44.11,
          "gdpvalAaNormalized": 20.3,
          "gdpvalAa": 905,
          "gertLabs": 28.96
        },
        "coding": {
          "sweVerified": 69.2,
          "sweRebench": 53.7,
          "aaCodingIndex": 30.25,
          "terminalBenchHard": 26.5,
          "aaSciCode": 37.7
        },
        "reasoning": {
          "longBenchV2": 59,
          "lcr": 62.7,
          "critpt": 0.9
        },
        "multimodalGrounded": {
          "mmmu": 81.4,
          "mmvu": 72.3,
          "mathVision": 83.9,
          "vStar": 92.7,
          "aaMmmuPro": 72.7
        },
        "knowledge": {
          "mmluPro": 85.3,
          "superGpqa": 63.4,
          "gpqa": 84.2,
          "artificialAnalysis": 37.12,
          "aaGpqaDiamond": 84.5,
          "aaHle": 19.7,
          "aaOmniscienceIndex": -46.4,
          "omniscienceAccuracy": 20.5,
          "omniscienceHallucinationRate": 84
        },
        "multilingual": {
          "mmluProX": 81
        },
        "instructionFollowing": {
          "ifeval": 91.9,
          "aaIfBench": 72.5
        },
        "math": {}
      }
    },
    {
      "slug": "gemini-3-flash",
      "canonicalModelKey": "gemini-3-flash",
      "model": "Gemini 3 Flash",
      "creator": "Google",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 55,
      "rankingEligible": true,
      "overallRank": 63,
      "url": "https://benchlm.ai/models/gemini-3-flash",
      "markdownUrl": "https://benchlm.ai/md/models/gemini-3-flash.md",
      "id": 77,
      "releaseDate": "2025-12-01",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "gemini-3-flash",
        "familyName": "Gemini 3 Flash",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "gemini-3-flash",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 55,
        "overallScore": 55,
        "rawOverallScore": 55,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 44.7,
          "coding": 47.5,
          "reasoning": 66.1,
          "multimodalGrounded": 72,
          "knowledge": 46.3,
          "multilingual": 62.5,
          "instructionFollowing": 66.9,
          "math": 52.3
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 63,
        "categoryRanks": {
          "agentic": 52,
          "coding": 62,
          "reasoning": 30,
          "multimodalGrounded": 27,
          "knowledge": 66,
          "multilingual": 48,
          "instructionFollowing": 57,
          "math": 48
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 2,
        "verifiedBenchmarkCount": 2,
        "rankableBenchmarkCount": 41,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "clawEval": 49.2,
          "aaAgenticIndex": 35.01,
          "tau2Bench": 43.3,
          "gdpvalAaNormalized": 30.7,
          "gdpvalAa": 1114,
          "gertLabs": 56.63
        },
        "coding": {
          "vibeCodeBench": 20.204,
          "aaCodingIndex": 37.84,
          "terminalBenchHard": 31.8,
          "aaSciCode": 49.9
        },
        "reasoning": {
          "lcr": 48,
          "critpt": 1.4
        },
        "multimodalGrounded": {
          "aaMmmuPro": 78.6,
          "designArenaWebsite": 1241
        },
        "knowledge": {
          "artificialAnalysis": 35.05,
          "aaGpqaDiamond": 81.2,
          "aaHle": 14.1,
          "aaOmniscienceIndex": -3.6,
          "omniscienceAccuracy": 45.5,
          "omniscienceHallucinationRate": 90.2
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 55.1
        },
        "math": {}
      }
    },
    {
      "slug": "o3-mini",
      "canonicalModelKey": "o3-mini",
      "model": "o3-mini",
      "creator": "OpenAI",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "200K",
      "contextWindowTokens": 200000,
      "displayScore": 55,
      "rankingEligible": true,
      "overallRank": 64,
      "url": "https://benchlm.ai/models/o3-mini",
      "markdownUrl": "https://benchlm.ai/md/models/o3-mini.md",
      "id": 41,
      "releaseDate": "2025-01-31",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "o3",
        "familyName": "o3",
        "variantType": "mini",
        "snapshotLabel": null,
        "baseFamilyModelKey": "o3",
        "relatedModelKeys": [
          "o3",
          "o3-pro"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 55,
        "overallScore": 70,
        "rawOverallScore": 70,
        "verifiedDisplayScore": 64,
        "displayCategoryScores": {
          "agentic": 61.7,
          "coding": 49.4,
          "reasoning": 68,
          "multimodalGrounded": 65.6,
          "knowledge": 60.9,
          "multilingual": 44.8,
          "instructionFollowing": 85.3,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": 49.3,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 77.2,
          "multilingual": null,
          "instructionFollowing": 93.9,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 64,
        "categoryRanks": {
          "agentic": 26,
          "reasoning": 27,
          "multimodalGrounded": 38,
          "multilingual": 64,
          "instructionFollowing": 24
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": false,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": false,
          "multilingual": true,
          "instructionFollowing": true,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 5,
        "verifiedBenchmarkCount": 5,
        "rankableBenchmarkCount": 16,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "tau2Bench": 28.7
        },
        "coding": {
          "sweVerified": 49.3,
          "aaCodingIndex": 17.86,
          "terminalBenchHard": 6.8,
          "aaSciCode": 39.9
        },
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {
          "mmlu": 86.9,
          "gpqa": 77.2,
          "artificialAnalysis": 25.86,
          "aaGpqaDiamond": 74.8,
          "aaHle": 8.7
        },
        "multilingual": {},
        "instructionFollowing": {
          "ifeval": 93.9
        },
        "math": {
          "aime2024": 87.3
        }
      }
    },
    {
      "slug": "minimax-m2-7",
      "canonicalModelKey": "minimax-m2-7",
      "model": "MiniMax M2.7",
      "creator": "MiniMax",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "200K",
      "contextWindowTokens": 200000,
      "displayScore": 53,
      "rankingEligible": true,
      "overallRank": 65,
      "url": "https://benchlm.ai/models/minimax-m2-7",
      "markdownUrl": "https://benchlm.ai/md/models/minimax-m2-7.md",
      "id": 117,
      "releaseDate": "2026-03-18",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "minimax-m2-7",
        "familyName": "MiniMax M2.7",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "minimax-m2-7",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": "minimax-m2-5"
      },
      "scores": {
        "displayScore": 53,
        "overallScore": 53,
        "rawOverallScore": 53,
        "verifiedDisplayScore": 55,
        "displayCategoryScores": {
          "agentic": 48.6,
          "coding": 54,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 50.3,
          "multilingual": null,
          "instructionFollowing": 76.3,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": 57,
          "coding": 53.7,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 65,
        "categoryRanks": {
          "coding": 54,
          "instructionFollowing": 46
        },
        "categoryRankingEligible": {
          "agentic": false,
          "coding": true,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": true,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 17,
        "verifiedBenchmarkCount": 17,
        "rankableBenchmarkCount": 31,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 2
      },
      "benchmarks": {
        "agentic": {
          "terminalBench2": 57,
          "tau2Bench": 84.8,
          "toolathlon": 46.3,
          "mleBenchLite": 66.6,
          "mmClawBench": 62.7,
          "clawEval": 48.7,
          "aaAgenticIndex": 61.49,
          "apexAgentsAa": 10.6,
          "gdpvalAaNormalized": 50.2,
          "gdpvalAa": 1505,
          "gertLabs": 40.4
        },
        "coding": {
          "sweVerifiedArcee": 75.4,
          "swePro": 56.2,
          "sweRebench": 51.9,
          "sweMultilingual": 76.5,
          "multiSweBench": 52.7,
          "vibePro": 55.6,
          "nl2Repo": 39.8,
          "vibeCodeBench": 27.037,
          "reactNativeEvals": 71.4,
          "aaCodingIndex": 41.93,
          "terminalBenchHard": 39.4,
          "aaSciCode": 47
        },
        "reasoning": {
          "lcr": 68.7,
          "critpt": 0.6
        },
        "multimodalGrounded": {
          "gdpvalAa": 1495,
          "designArenaWebsite": 1287
        },
        "knowledge": {
          "gpqaDiamond": 87,
          "mmluProArcee": 80.8,
          "artificialAnalysis": 49.62,
          "aaGpqaDiamond": 87.4,
          "aaHle": 28.1,
          "aaOmniscienceIndex": 0.7,
          "omniscienceAccuracy": 26.1,
          "omniscienceHallucinationRate": 34.4
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 75.7
        },
        "math": {
          "aime2025Arcee": 80
        }
      }
    },
    {
      "slug": "deepseek-coder-2-0",
      "canonicalModelKey": "deepseek-coder-2-0",
      "model": "DeepSeek Coder 2.0",
      "creator": "DeepSeek",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 51,
      "rankingEligible": true,
      "overallRank": 66,
      "url": "https://benchlm.ai/models/deepseek-coder-2-0",
      "markdownUrl": "https://benchlm.ai/md/models/deepseek-coder-2-0.md",
      "id": 61,
      "releaseDate": null,
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "deepseek-coder-2-0",
        "familyName": "DeepSeek Coder 2.0",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "deepseek-coder-2-0",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 51,
        "overallScore": 62,
        "rawOverallScore": 62,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 59.8,
          "coding": 60.9,
          "reasoning": 59.5,
          "multimodalGrounded": 36.2,
          "knowledge": 55.6,
          "multilingual": 60.6,
          "instructionFollowing": 69.6,
          "math": 70.9
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 66,
        "categoryRanks": {
          "agentic": 30,
          "coding": 48,
          "reasoning": 36,
          "multimodalGrounded": 80,
          "knowledge": 57,
          "multilingual": 53,
          "instructionFollowing": 54,
          "math": 32
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 32,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "claude-4-1-opus",
      "canonicalModelKey": "claude-4-1-opus",
      "model": "Claude 4.1 Opus",
      "creator": "Anthropic",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "200K",
      "contextWindowTokens": 200000,
      "displayScore": 51,
      "rankingEligible": true,
      "overallRank": 67,
      "url": "https://benchlm.ai/models/claude-4-1-opus",
      "markdownUrl": "https://benchlm.ai/md/models/claude-4-1-opus.md",
      "id": 69,
      "releaseDate": "2025-08-01",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "claude-4-1-opus",
        "familyName": "Claude 4.1 Opus",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "claude-4-1-opus",
        "relatedModelKeys": [
          "claude-4-1-opus-thinking"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 51,
        "overallScore": 59,
        "rawOverallScore": 59,
        "verifiedDisplayScore": 75,
        "displayCategoryScores": {
          "agentic": 38.8,
          "coding": 56.4,
          "reasoning": 55.7,
          "multimodalGrounded": 77.3,
          "knowledge": 51.8,
          "multilingual": 66.2,
          "instructionFollowing": 58.6,
          "math": 64.8
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": 74.5,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 67,
        "categoryRanks": {
          "agentic": 62,
          "coding": 52,
          "reasoning": 43,
          "multimodalGrounded": 20,
          "knowledge": 59,
          "multilingual": 41,
          "instructionFollowing": 72,
          "math": 39
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 1,
        "verifiedBenchmarkCount": 1,
        "rankableBenchmarkCount": 34,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {
          "sweVerified": 74.5
        },
        "reasoning": {},
        "multimodalGrounded": {
          "designArenaWebsite": 1222
        },
        "knowledge": {
          "artificialAnalysis": 36
        },
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "deepseek-llm-2-0",
      "canonicalModelKey": "deepseek-llm-2-0",
      "model": "DeepSeek LLM 2.0",
      "creator": "DeepSeek",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 50,
      "rankingEligible": true,
      "overallRank": 68,
      "url": "https://benchlm.ai/models/deepseek-llm-2-0",
      "markdownUrl": "https://benchlm.ai/md/models/deepseek-llm-2-0.md",
      "id": 79,
      "releaseDate": null,
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "deepseek-llm-2-0",
        "familyName": "DeepSeek LLM 2.0",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "deepseek-llm-2-0",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 50,
        "overallScore": 54,
        "rawOverallScore": 54,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 49.4,
          "coding": 45.6,
          "reasoning": 58.4,
          "multimodalGrounded": 47.5,
          "knowledge": 53.4,
          "multilingual": 57.8,
          "instructionFollowing": 65.9,
          "math": 70.8
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 68,
        "categoryRanks": {
          "agentic": 47,
          "coding": 64,
          "reasoning": 41,
          "multimodalGrounded": 72,
          "knowledge": 58,
          "multilingual": 57,
          "instructionFollowing": 59,
          "math": 33
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 32,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "qwen2-5-1m",
      "canonicalModelKey": "qwen2-5-1m",
      "model": "Qwen2.5-1M",
      "creator": "Alibaba",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 50,
      "rankingEligible": true,
      "overallRank": 69,
      "url": "https://benchlm.ai/models/qwen2-5-1m",
      "markdownUrl": "https://benchlm.ai/md/models/qwen2-5-1m.md",
      "id": 54,
      "releaseDate": "2025-01-27",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "qwen2-5-1m",
        "familyName": "Qwen2.5-1M",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "qwen2-5-1m",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 50,
        "overallScore": 63,
        "rawOverallScore": 63,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 54.6,
          "coding": 48.6,
          "reasoning": 70.6,
          "multimodalGrounded": 54.3,
          "knowledge": 56.6,
          "multilingual": 62.3,
          "instructionFollowing": 62.3,
          "math": 76.9
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 69,
        "categoryRanks": {
          "agentic": 39,
          "coding": 60,
          "reasoning": 24,
          "multimodalGrounded": 64,
          "knowledge": 54,
          "multilingual": 50,
          "instructionFollowing": 65,
          "math": 26
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 32,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "claude-4-sonnet",
      "canonicalModelKey": "claude-4-sonnet",
      "model": "Claude 4 Sonnet",
      "creator": "Anthropic",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "200K",
      "contextWindowTokens": 200000,
      "displayScore": 50,
      "rankingEligible": true,
      "overallRank": 70,
      "url": "https://benchlm.ai/models/claude-4-sonnet",
      "markdownUrl": "https://benchlm.ai/md/models/claude-4-sonnet.md",
      "id": 67,
      "releaseDate": "2025-05-01",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "claude-4-sonnet",
        "familyName": "Claude 4 Sonnet",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "claude-4-sonnet",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 50,
        "overallScore": 59,
        "rawOverallScore": 59,
        "verifiedDisplayScore": 73,
        "displayCategoryScores": {
          "agentic": 43.8,
          "coding": 53.4,
          "reasoning": 54.7,
          "multimodalGrounded": 75.5,
          "knowledge": 50.1,
          "multilingual": 67,
          "instructionFollowing": 58.6,
          "math": 61.1
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": 72.7,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 70,
        "categoryRanks": {
          "agentic": 54,
          "coding": 55,
          "reasoning": 48,
          "multimodalGrounded": 23,
          "knowledge": 61,
          "multilingual": 40,
          "instructionFollowing": 73,
          "math": 41
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 1,
        "verifiedBenchmarkCount": 1,
        "rankableBenchmarkCount": 33,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 39.21,
          "tau2Bench": 52.3,
          "gdpvalAaNormalized": 31.2,
          "gdpvalAa": 1123,
          "gertLabs": 39.66
        },
        "coding": {
          "sweVerified": 72.7,
          "aaCodingIndex": 30.6,
          "terminalBenchHard": 27.3,
          "aaSciCode": 37.3
        },
        "reasoning": {
          "lcr": 44.3,
          "critpt": 1.1
        },
        "multimodalGrounded": {
          "aaMmmuPro": 62.4,
          "designArenaWebsite": 1191
        },
        "knowledge": {
          "artificialAnalysis": 33,
          "aaGpqaDiamond": 68.3,
          "aaHle": 4,
          "aaOmniscienceIndex": -9.2,
          "omniscienceAccuracy": 22.4,
          "omniscienceHallucinationRate": 40.8
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 45.4
        },
        "math": {}
      }
    },
    {
      "slug": "gpt-4o-mini",
      "canonicalModelKey": "gpt-4o-mini",
      "model": "GPT-4o mini",
      "creator": "OpenAI",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 49,
      "rankingEligible": true,
      "overallRank": 71,
      "url": "https://benchlm.ai/models/gpt-4o-mini",
      "markdownUrl": "https://benchlm.ai/md/models/gpt-4o-mini.md",
      "id": 101,
      "releaseDate": "2024-07-18",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "gpt-4o",
        "familyName": "GPT-4o",
        "variantType": "mini",
        "snapshotLabel": null,
        "baseFamilyModelKey": "gpt-4o",
        "relatedModelKeys": [
          "gpt-4o"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 49,
        "overallScore": 49,
        "rawOverallScore": 49,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 39,
          "coding": 90.9,
          "reasoning": 20.6,
          "multimodalGrounded": 41.2,
          "knowledge": 49,
          "multilingual": 46.3,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 71,
        "categoryRanks": {
          "agentic": 59,
          "reasoning": 81,
          "multimodalGrounded": 75,
          "multilingual": 62
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": false,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": false,
          "multilingual": true,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 14,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {
          "aaSciCode": 22.9
        },
        "reasoning": {},
        "multimodalGrounded": {
          "aaMmmuPro": 41.5
        },
        "knowledge": {
          "artificialAnalysis": 12.65,
          "aaGpqaDiamond": 42.6,
          "aaHle": 4
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 31
        },
        "math": {},
        "korean": {}
      }
    },
    {
      "slug": "qwen2-5-72b",
      "canonicalModelKey": "qwen2-5-72b",
      "model": "Qwen2.5-72B",
      "creator": "Alibaba",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 49,
      "rankingEligible": true,
      "overallRank": 72,
      "url": "https://benchlm.ai/models/qwen2-5-72b",
      "markdownUrl": "https://benchlm.ai/md/models/qwen2-5-72b.md",
      "id": 73,
      "releaseDate": "2024-09-19",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "qwen2-5-72b",
        "familyName": "Qwen2.5-72B",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "qwen2-5-72b",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 49,
        "overallScore": 57,
        "rawOverallScore": 57,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 46.4,
          "coding": 46.8,
          "reasoning": 60.1,
          "multimodalGrounded": 51.7,
          "knowledge": 56.4,
          "multilingual": 63.4,
          "instructionFollowing": 65.9,
          "math": 76.2
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 72,
        "categoryRanks": {
          "agentic": 49,
          "coding": 63,
          "reasoning": 35,
          "multimodalGrounded": 67,
          "knowledge": 55,
          "multilingual": 47,
          "instructionFollowing": 60,
          "math": 27
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 32,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "deepseekmath-v2",
      "canonicalModelKey": "deepseekmath-v2",
      "model": "DeepSeekMath V2",
      "creator": "DeepSeek",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 49,
      "rankingEligible": true,
      "overallRank": 73,
      "url": "https://benchlm.ai/models/deepseekmath-v2",
      "markdownUrl": "https://benchlm.ai/md/models/deepseekmath-v2.md",
      "id": 62,
      "releaseDate": null,
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "deepseekmath",
        "familyName": "DeepSeekMath",
        "variantType": "snapshot",
        "snapshotLabel": "V2",
        "baseFamilyModelKey": "deepseekmath-v2",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 49,
        "overallScore": 61,
        "rawOverallScore": 61,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 53.4,
          "coding": 51.3,
          "reasoning": 60.2,
          "multimodalGrounded": 53.9,
          "knowledge": 57.3,
          "multilingual": 68.1,
          "instructionFollowing": 58.6,
          "math": 74.7
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 73,
        "categoryRanks": {
          "agentic": 43,
          "coding": 58,
          "reasoning": 34,
          "multimodalGrounded": 66,
          "knowledge": 53,
          "multilingual": 36,
          "instructionFollowing": 74,
          "math": 29
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 32,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "mistral-large-3",
      "canonicalModelKey": "mistral-large-3",
      "model": "Mistral Large 3",
      "creator": "Mistral",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 48,
      "rankingEligible": true,
      "overallRank": 74,
      "url": "https://benchlm.ai/models/mistral-large-3",
      "markdownUrl": "https://benchlm.ai/md/models/mistral-large-3.md",
      "id": 99,
      "releaseDate": "2025-12-02",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "mistral-large-3",
        "familyName": "Mistral Large 3",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "mistral-large-3",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 48,
        "overallScore": 49,
        "rawOverallScore": 49,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 37.4,
          "coding": 37.5,
          "reasoning": 52,
          "multimodalGrounded": 67.7,
          "knowledge": 41.1,
          "multilingual": 57.8,
          "instructionFollowing": 67,
          "math": 64.3
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 74,
        "categoryRanks": {
          "agentic": 65,
          "coding": 69,
          "reasoning": 51,
          "multimodalGrounded": 33,
          "knowledge": 73,
          "multilingual": 58,
          "instructionFollowing": 56,
          "math": 40
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 36,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 21.7,
          "tau2Bench": 24.6,
          "gdpvalAaNormalized": 18.2,
          "gdpvalAa": 864
        },
        "coding": {
          "aaCodingIndex": 22.68,
          "terminalBenchHard": 15.9,
          "aaSciCode": 36.2
        },
        "reasoning": {
          "lcr": 34.7,
          "critpt": 0
        },
        "multimodalGrounded": {
          "aaMmmuPro": 55.7
        },
        "knowledge": {
          "artificialAnalysis": 22.8,
          "aaGpqaDiamond": 68,
          "aaHle": 4.1,
          "aaOmniscienceIndex": -39.4,
          "omniscienceAccuracy": 24.1,
          "omniscienceHallucinationRate": 83.7
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 36.2
        },
        "math": {}
      }
    },
    {
      "slug": "gemini-3-1-flash-lite",
      "canonicalModelKey": "gemini-3-1-flash-lite",
      "model": "Gemini 3.1 Flash-Lite",
      "creator": "Google",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 47,
      "rankingEligible": true,
      "overallRank": 75,
      "url": "https://benchlm.ai/models/gemini-3-1-flash-lite",
      "markdownUrl": "https://benchlm.ai/md/models/gemini-3-1-flash-lite.md",
      "id": 113,
      "releaseDate": "2026-03-03",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "gemini-3-1-flash-lite",
        "familyName": "Gemini 3.1 Flash-Lite",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "gemini-3-1-flash-lite",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 47,
        "overallScore": 38,
        "rawOverallScore": 38,
        "verifiedDisplayScore": 73,
        "displayCategoryScores": {
          "agentic": 41.2,
          "coding": 25.8,
          "reasoning": 58.8,
          "multimodalGrounded": 58.3,
          "knowledge": 35.5,
          "multilingual": 32.7,
          "instructionFollowing": 44,
          "math": 43.3
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": 73.2,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 75,
        "categoryRanks": {
          "agentic": 57,
          "coding": 75,
          "reasoning": 38,
          "multimodalGrounded": 57,
          "knowledge": 78,
          "multilingual": 77,
          "instructionFollowing": 90,
          "math": 57
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 2,
        "verifiedBenchmarkCount": 2,
        "rankableBenchmarkCount": 34,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 25.67,
          "apexAgentsAa": 12.2,
          "tau2Bench": 31.3,
          "gdpvalAaNormalized": 21.3,
          "gdpvalAa": 926,
          "gertLabs": 38.46
        },
        "coding": {
          "vibeCodeBench": 0,
          "aaCodingIndex": 30.13,
          "terminalBenchHard": 24.2,
          "aaSciCode": 41.9
        },
        "reasoning": {
          "lcr": 65.3,
          "critpt": 1.1
        },
        "multimodalGrounded": {
          "charxiv": 73.2,
          "aaMmmuPro": 75.5
        },
        "knowledge": {
          "artificialAnalysis": 33.52,
          "aaGpqaDiamond": 82.2,
          "aaHle": 16.2,
          "aaOmniscienceIndex": -15.5,
          "omniscienceAccuracy": 36.4,
          "omniscienceHallucinationRate": 81.6
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 77.2
        },
        "math": {}
      }
    },
    {
      "slug": "qwen3-235b-2507-reasoning",
      "canonicalModelKey": "qwen3-235b-2507-reasoning",
      "model": "Qwen3 235B 2507 (Reasoning)",
      "creator": "Alibaba",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 45,
      "rankingEligible": true,
      "overallRank": 76,
      "url": "https://benchlm.ai/models/qwen3-235b-2507-reasoning",
      "markdownUrl": "https://benchlm.ai/md/models/qwen3-235b-2507-reasoning.md",
      "id": 119,
      "releaseDate": "2025-07-01",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "qwen3-235b-2507",
        "familyName": "Qwen3 235B 2507",
        "variantType": "reasoning",
        "snapshotLabel": "2507",
        "baseFamilyModelKey": "qwen3-235b-2507",
        "relatedModelKeys": [
          "qwen3-235b-2507"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 45,
        "overallScore": 34,
        "rawOverallScore": 34,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 38.9,
          "coding": 41.5,
          "reasoning": 35.9,
          "multimodalGrounded": 7.3,
          "knowledge": 43.5,
          "multilingual": 45.6,
          "instructionFollowing": 76.2,
          "math": 40.3
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 76,
        "categoryRanks": {
          "agentic": 60,
          "coding": 67,
          "reasoning": 66,
          "multimodalGrounded": 98,
          "knowledge": 71,
          "multilingual": 63,
          "instructionFollowing": 47,
          "math": 60
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 32,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "gpt-4-1-mini",
      "canonicalModelKey": "gpt-4-1-mini",
      "model": "GPT-4.1 mini",
      "creator": "OpenAI",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 45,
      "rankingEligible": true,
      "overallRank": 77,
      "url": "https://benchlm.ai/models/gpt-4-1-mini",
      "markdownUrl": "https://benchlm.ai/md/models/gpt-4-1-mini.md",
      "id": 91,
      "releaseDate": "2025-04-14",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "gpt-4-1",
        "familyName": "GPT-4.1",
        "variantType": "mini",
        "snapshotLabel": null,
        "baseFamilyModelKey": "gpt-4-1",
        "relatedModelKeys": [
          "gpt-4-1",
          "gpt-4-1-nano"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": "gpt-4o-mini"
      },
      "scores": {
        "displayScore": 45,
        "overallScore": 51,
        "rawOverallScore": 51,
        "verifiedDisplayScore": 46,
        "displayCategoryScores": {
          "agentic": 43.3,
          "coding": 24.3,
          "reasoning": 80.3,
          "multimodalGrounded": 56.8,
          "knowledge": 49.4,
          "multilingual": 42,
          "instructionFollowing": 78.8,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": 23.6,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 64.2,
          "multilingual": null,
          "instructionFollowing": 88.5,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 77,
        "categoryRanks": {
          "agentic": 56,
          "reasoning": 16,
          "multimodalGrounded": 60,
          "multilingual": 67,
          "instructionFollowing": 41
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": false,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": false,
          "multilingual": true,
          "instructionFollowing": true,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 4,
        "verifiedBenchmarkCount": 4,
        "rankableBenchmarkCount": 17,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 25.15,
          "tau2Bench": 52.9,
          "gdpvalAaNormalized": 6,
          "gdpvalAa": 619
        },
        "coding": {
          "sweVerified": 23.6,
          "aaCodingIndex": 18.52,
          "terminalBenchHard": 7.6,
          "aaSciCode": 40.4
        },
        "reasoning": {
          "lcr": 42.3,
          "critpt": 0
        },
        "multimodalGrounded": {
          "aaMmmuPro": 58.7,
          "designArenaWebsite": 1043
        },
        "knowledge": {
          "mmlu": 87.5,
          "gpqa": 64.2,
          "artificialAnalysis": 22.9,
          "aaGpqaDiamond": 66.4,
          "aaHle": 4.6,
          "aaOmniscienceIndex": -50.1,
          "omniscienceAccuracy": 17.5,
          "omniscienceHallucinationRate": 82
        },
        "multilingual": {},
        "instructionFollowing": {
          "ifeval": 88.5,
          "aaIfBench": 38.3
        },
        "math": {},
        "korean": {}
      }
    },
    {
      "slug": "nemotron-3-super-100b",
      "canonicalModelKey": "nemotron-3-super-100b",
      "model": "Nemotron 3 Super 100B",
      "creator": "NVIDIA",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 43,
      "rankingEligible": true,
      "overallRank": 78,
      "url": "https://benchlm.ai/models/nemotron-3-super-100b",
      "markdownUrl": "https://benchlm.ai/md/models/nemotron-3-super-100b.md",
      "id": 92,
      "releaseDate": "2026-01-15",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "nemotron-3-super-100b",
        "familyName": "Nemotron 3 Super 100B",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "nemotron-3-super-100b",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 43,
        "overallScore": 51,
        "rawOverallScore": 51,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 43.6,
          "coding": 43,
          "reasoning": 57,
          "multimodalGrounded": 40,
          "knowledge": 44,
          "multilingual": 59.7,
          "instructionFollowing": 62.3,
          "math": 52.5
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 78,
        "categoryRanks": {
          "agentic": 55,
          "coding": 66,
          "reasoning": 42,
          "multimodalGrounded": 77,
          "knowledge": 70,
          "multilingual": 54,
          "instructionFollowing": 66,
          "math": 47
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 1,
        "verifiedBenchmarkCount": 1,
        "rankableBenchmarkCount": 33,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "clawEval": 5.5
        },
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "o4-mini-high",
      "canonicalModelKey": "o4-mini-high",
      "model": "o4-mini (high)",
      "creator": "OpenAI",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "200K",
      "contextWindowTokens": 200000,
      "displayScore": 43,
      "rankingEligible": true,
      "overallRank": 79,
      "url": "https://benchlm.ai/models/o4-mini-high",
      "markdownUrl": "https://benchlm.ai/md/models/o4-mini-high.md",
      "id": 90,
      "releaseDate": "2025-04-16",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "o4-mini",
        "familyName": "o4-mini",
        "variantType": "reasoning",
        "snapshotLabel": "high",
        "baseFamilyModelKey": "o4-mini-high",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 43,
        "overallScore": 51,
        "rawOverallScore": 51,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 38.7,
          "coding": 45.5,
          "reasoning": 48.1,
          "multimodalGrounded": 50.3,
          "knowledge": 58,
          "multilingual": 66.1,
          "instructionFollowing": 58.6,
          "math": 54.3
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 79,
        "categoryRanks": {
          "agentic": 63,
          "coding": 65,
          "reasoning": 55,
          "multimodalGrounded": 68,
          "knowledge": 52,
          "multilingual": 42,
          "instructionFollowing": 75,
          "math": 45
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 39,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "claude-4-1-opus-thinking",
      "canonicalModelKey": "claude-4-1-opus-thinking",
      "model": "Claude 4.1 Opus Thinking",
      "creator": "Anthropic",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "200K",
      "contextWindowTokens": 200000,
      "displayScore": 43,
      "rankingEligible": true,
      "overallRank": 80,
      "url": "https://benchlm.ai/models/claude-4-1-opus-thinking",
      "markdownUrl": "https://benchlm.ai/md/models/claude-4-1-opus-thinking.md",
      "id": 98,
      "releaseDate": "2025-08-01",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "claude-4-1-opus",
        "familyName": "Claude 4.1 Opus",
        "variantType": "reasoning",
        "snapshotLabel": null,
        "baseFamilyModelKey": "claude-4-1-opus",
        "relatedModelKeys": [
          "claude-4-1-opus"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 43,
        "overallScore": 48,
        "rawOverallScore": 48,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 24.9,
          "coding": 51.3,
          "reasoning": 49.2,
          "multimodalGrounded": 65.7,
          "knowledge": 35.2,
          "multilingual": 50.5,
          "instructionFollowing": 76.9,
          "math": 52.9
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 80,
        "categoryRanks": {
          "agentic": 75,
          "coding": 59,
          "reasoning": 54,
          "multimodalGrounded": 37,
          "knowledge": 80,
          "multilingual": 60,
          "instructionFollowing": 45,
          "math": 46
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 32,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "tau2Bench": 71.4
        },
        "coding": {
          "aaCodingIndex": 36.52,
          "terminalBenchHard": 34.3,
          "aaSciCode": 40.9
        },
        "reasoning": {
          "lcr": 66.3,
          "critpt": 0
        },
        "multimodalGrounded": {
          "aaMmmuPro": 67.9
        },
        "knowledge": {
          "artificialAnalysis": 42,
          "aaGpqaDiamond": 80.9,
          "aaHle": 11.9
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 55.4
        },
        "math": {}
      }
    },
    {
      "slug": "gpt-4o",
      "canonicalModelKey": "gpt-4o",
      "model": "GPT-4o",
      "creator": "OpenAI",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 42,
      "rankingEligible": true,
      "overallRank": 81,
      "url": "https://benchlm.ai/models/gpt-4o",
      "markdownUrl": "https://benchlm.ai/md/models/gpt-4o.md",
      "id": 108,
      "releaseDate": "2024-05-13",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "gpt-4o",
        "familyName": "GPT-4o",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "gpt-4o",
        "relatedModelKeys": [
          "gpt-4o-mini"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 42,
        "overallScore": 42,
        "rawOverallScore": 42,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 38.9,
          "coding": 24.4,
          "reasoning": 46.8,
          "multimodalGrounded": 62.2,
          "knowledge": 35.6,
          "multilingual": 48.7,
          "instructionFollowing": 54.9,
          "math": 52
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 81,
        "categoryRanks": {
          "agentic": 61,
          "coding": 76,
          "reasoning": 57,
          "multimodalGrounded": 49,
          "knowledge": 77,
          "multilingual": 61,
          "instructionFollowing": 79,
          "math": 49
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 34,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 8.38,
          "tau2Bench": 25.1,
          "gdpvalAaNormalized": 0,
          "gdpvalAa": 348
        },
        "coding": {
          "aaCodingIndex": 16.67,
          "terminalBenchHard": 8.3,
          "aaSciCode": 33.3
        },
        "reasoning": {
          "lcr": 0,
          "critpt": 0
        },
        "multimodalGrounded": {
          "designArenaWebsite": 876
        },
        "knowledge": {
          "artificialAnalysis": 17.32,
          "aaGpqaDiamond": 54.3,
          "aaHle": 3.3,
          "aaOmniscienceIndex": -10.7,
          "omniscienceAccuracy": 19.7,
          "omniscienceHallucinationRate": 37.9
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 34.3
        },
        "math": {},
        "korean": {}
      }
    },
    {
      "slug": "kimi-k2",
      "canonicalModelKey": "kimi-k2",
      "model": "Kimi K2",
      "creator": "Moonshot AI",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 41,
      "rankingEligible": true,
      "overallRank": 82,
      "url": "https://benchlm.ai/models/kimi-k2",
      "markdownUrl": "https://benchlm.ai/md/models/kimi-k2.md",
      "id": 96,
      "releaseDate": "2025-07-01",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "kimi-k2",
        "familyName": "Kimi K2",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "kimi-k2",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 41,
        "overallScore": 55,
        "rawOverallScore": 55,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 30.2,
          "coding": 53.4,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 56,
          "multilingual": null,
          "instructionFollowing": 83.5,
          "math": 51.3
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 82,
        "categoryRanks": {
          "knowledge": 56,
          "instructionFollowing": 30,
          "math": 51
        },
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": true,
          "multilingual": false,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 18,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "tau2Bench": 61.1,
          "aaAgenticIndex": 24.27,
          "gdpvalAaNormalized": 1.2,
          "gdpvalAa": 525
        },
        "coding": {
          "aaCodingIndex": 22.1,
          "terminalBenchHard": 15.9,
          "aaSciCode": 34.5
        },
        "reasoning": {
          "lcr": 51,
          "critpt": 0
        },
        "multimodalGrounded": {
          "designArenaWebsite": 1096
        },
        "knowledge": {
          "artificialAnalysis": 26.32,
          "aaGpqaDiamond": 76.6,
          "aaHle": 7,
          "aaOmniscienceIndex": -27.5,
          "omniscienceAccuracy": 26.8,
          "omniscienceHallucinationRate": 74.2
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 41.5
        },
        "math": {}
      }
    },
    {
      "slug": "llama-3-1-405b",
      "canonicalModelKey": "llama-3-1-405b",
      "model": "Llama 3.1 405B",
      "creator": "Meta",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 40,
      "rankingEligible": true,
      "overallRank": 83,
      "url": "https://benchlm.ai/models/llama-3-1-405b",
      "markdownUrl": "https://benchlm.ai/md/models/llama-3-1-405b.md",
      "id": 97,
      "releaseDate": "2024-07-23",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "llama-3-1-405b",
        "familyName": "Llama 3.1 405B",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "llama-3-1-405b",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 40,
        "overallScore": 49,
        "rawOverallScore": 49,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 31.4,
          "coding": 36.5,
          "reasoning": 50.3,
          "multimodalGrounded": 43.8,
          "knowledge": 45.2,
          "multilingual": 61.6,
          "instructionFollowing": 69.6,
          "math": 58
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 83,
        "categoryRanks": {
          "agentic": 72,
          "coding": 70,
          "reasoning": 52,
          "multimodalGrounded": 74,
          "knowledge": 67,
          "multilingual": 51,
          "instructionFollowing": 55,
          "math": 42
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 32,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 6.34,
          "tau2Bench": 19,
          "gdpvalAaNormalized": 0,
          "gdpvalAa": 255
        },
        "coding": {
          "aaCodingIndex": 14.5,
          "terminalBenchHard": 6.8,
          "aaSciCode": 29.9
        },
        "reasoning": {
          "lcr": 24.3,
          "critpt": 0
        },
        "multimodalGrounded": {},
        "knowledge": {
          "artificialAnalysis": 17.38,
          "aaGpqaDiamond": 51.5,
          "aaHle": 4.2,
          "aaOmniscienceIndex": -17.3,
          "omniscienceAccuracy": 22.3,
          "omniscienceHallucinationRate": 51
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 39
        },
        "math": {}
      }
    },
    {
      "slug": "claude-3-5-sonnet",
      "canonicalModelKey": "claude-3-5-sonnet",
      "model": "Claude 3.5 Sonnet",
      "creator": "Anthropic",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "200K",
      "contextWindowTokens": 200000,
      "displayScore": 40,
      "rankingEligible": true,
      "overallRank": 84,
      "url": "https://benchlm.ai/models/claude-3-5-sonnet",
      "markdownUrl": "https://benchlm.ai/md/models/claude-3-5-sonnet.md",
      "id": 95,
      "releaseDate": "2024-06-01",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "claude-3-5-sonnet",
        "familyName": "Claude 3.5 Sonnet",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "claude-3-5-sonnet",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 40,
        "overallScore": 51,
        "rawOverallScore": 51,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 38.4,
          "coding": 40.3,
          "reasoning": 49.8,
          "multimodalGrounded": 66.9,
          "knowledge": 39.1,
          "multilingual": 62.5,
          "instructionFollowing": 58.6,
          "math": 50.8
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 84,
        "categoryRanks": {
          "agentic": 64,
          "coding": 68,
          "reasoning": 53,
          "multimodalGrounded": 35,
          "knowledge": 75,
          "multilingual": 49,
          "instructionFollowing": 76,
          "math": 52
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 33,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {
          "sweVerified": 49
        },
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {
          "gpqa": 59.4
        },
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "grok-code-fast-1",
      "canonicalModelKey": "grok-code-fast-1",
      "model": "Grok Code Fast 1",
      "creator": "xAI",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "256K",
      "contextWindowTokens": 256000,
      "displayScore": 39,
      "rankingEligible": true,
      "overallRank": 85,
      "url": "https://benchlm.ai/models/grok-code-fast-1",
      "markdownUrl": "https://benchlm.ai/md/models/grok-code-fast-1.md",
      "id": 93,
      "releaseDate": "2025-08-28",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "grok-code-fast-1",
        "familyName": "Grok Code Fast 1",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "grok-code-fast-1",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 39,
        "overallScore": 48,
        "rawOverallScore": 48,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 40.1,
          "coding": 61.6,
          "reasoning": 43.1,
          "multimodalGrounded": 21.2,
          "knowledge": 34.7,
          "multilingual": 43.7,
          "instructionFollowing": 44,
          "math": 45.6
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 85,
        "categoryRanks": {
          "agentic": 58,
          "coding": 47,
          "reasoning": 60,
          "multimodalGrounded": 88,
          "knowledge": 82,
          "multilingual": 65,
          "instructionFollowing": 91,
          "math": 55
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 32,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 35.63,
          "tau2Bench": 75.7,
          "gdpvalAaNormalized": 13.1,
          "gdpvalAa": 763
        },
        "coding": {
          "sweVerified": 70.8,
          "aaCodingIndex": 23.69,
          "terminalBenchHard": 17.4,
          "aaSciCode": 36.2
        },
        "reasoning": {
          "lcr": 48.3,
          "critpt": 0
        },
        "multimodalGrounded": {},
        "knowledge": {
          "artificialAnalysis": 28.74,
          "aaGpqaDiamond": 72.7,
          "aaHle": 7.5,
          "aaOmniscienceIndex": -36,
          "omniscienceAccuracy": 23.8,
          "omniscienceHallucinationRate": 78.5
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 41.4
        },
        "math": {}
      }
    },
    {
      "slug": "sarvam-105b",
      "canonicalModelKey": "sarvam-105b",
      "model": "Sarvam 105B",
      "creator": "Sarvam",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 39,
      "rankingEligible": true,
      "overallRank": 86,
      "url": "https://benchlm.ai/models/sarvam-105b",
      "markdownUrl": "https://benchlm.ai/md/models/sarvam-105b.md",
      "id": 115,
      "releaseDate": "2026-03-06",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "sarvam-105b",
        "familyName": "Sarvam 105B",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "sarvam-105b",
        "relatedModelKeys": [
          "sarvam-30b"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": "sarvam-m"
      },
      "scores": {
        "displayScore": 39,
        "overallScore": 45,
        "rawOverallScore": 45,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 28.9,
          "coding": 36.9,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 49.2,
          "multilingual": null,
          "instructionFollowing": 65.2,
          "math": 90.4
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 86,
        "categoryRanks": {
          "instructionFollowing": 61,
          "math": 14
        },
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 12,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 24.69,
          "tau2Bench": 46.8,
          "gdpvalAaNormalized": 11.9,
          "gdpvalAa": 738
        },
        "coding": {
          "aaCodingIndex": 9.81,
          "terminalBenchHard": 1.5,
          "aaSciCode": 26.4
        },
        "reasoning": {
          "lcr": 0,
          "critpt": 0
        },
        "multimodalGrounded": {},
        "knowledge": {
          "artificialAnalysis": 18.16,
          "aaGpqaDiamond": 73.8,
          "aaHle": 10.1,
          "aaOmniscienceIndex": -59.5,
          "omniscienceAccuracy": 17.6,
          "omniscienceHallucinationRate": 93.5
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 34.4
        },
        "math": {}
      }
    },
    {
      "slug": "mistral-large-2",
      "canonicalModelKey": "mistral-large-2",
      "model": "Mistral Large 2",
      "creator": "Mistral",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 38,
      "rankingEligible": true,
      "overallRank": 87,
      "url": "https://benchlm.ai/models/mistral-large-2",
      "markdownUrl": "https://benchlm.ai/md/models/mistral-large-2.md",
      "id": 100,
      "releaseDate": "2024-07-24",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "mistral-large-2",
        "familyName": "Mistral Large 2",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "mistral-large-2",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 38,
        "overallScore": 48,
        "rawOverallScore": 48,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 32.5,
          "coding": 32.6,
          "reasoning": 44.8,
          "multimodalGrounded": 41.1,
          "knowledge": 47.7,
          "multilingual": 58.7,
          "instructionFollowing": 50.6,
          "math": 47.8
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 87,
        "categoryRanks": {
          "agentic": 71,
          "coding": 71,
          "reasoning": 58,
          "multimodalGrounded": 76,
          "knowledge": 65,
          "multilingual": 56,
          "instructionFollowing": 83,
          "math": 53
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 32,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 10.23,
          "tau2Bench": 30.7,
          "gdpvalAaNormalized": 0,
          "gdpvalAa": 323
        },
        "coding": {
          "aaCodingIndex": 13.76,
          "terminalBenchHard": 6.1,
          "aaSciCode": 29.2
        },
        "reasoning": {
          "lcr": 5.3,
          "critpt": 0
        },
        "multimodalGrounded": {},
        "knowledge": {
          "artificialAnalysis": 15.09,
          "aaGpqaDiamond": 48.6,
          "aaHle": 4,
          "aaOmniscienceIndex": -34,
          "omniscienceAccuracy": 20.1,
          "omniscienceHallucinationRate": 67.8
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 31.2
        },
        "math": {}
      }
    },
    {
      "slug": "gemini-2-5-flash",
      "canonicalModelKey": "gemini-2-5-flash",
      "model": "Gemini 2.5 Flash",
      "creator": "Google",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 37,
      "rankingEligible": true,
      "overallRank": 88,
      "url": "https://benchlm.ai/models/gemini-2-5-flash",
      "markdownUrl": "https://benchlm.ai/md/models/gemini-2-5-flash.md",
      "id": 125,
      "releaseDate": "2025-06-17",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "gemini-2-5-flash",
        "familyName": "Gemini 2.5 Flash",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "gemini-2-5-flash",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 37,
        "overallScore": 32,
        "rawOverallScore": 32,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 33,
          "coding": 16.5,
          "reasoning": 44,
          "multimodalGrounded": 54,
          "knowledge": 26.6,
          "multilingual": 35.5,
          "instructionFollowing": 41.8,
          "math": 27.3
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 88,
        "categoryRanks": {
          "agentic": 69,
          "coding": 85,
          "reasoning": 59,
          "multimodalGrounded": 65,
          "knowledge": 88,
          "multilingual": 73,
          "instructionFollowing": 94,
          "math": 72
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 34,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 15.01,
          "tau2Bench": 14.9,
          "gdpvalAaNormalized": 11.9,
          "gdpvalAa": 739
        },
        "coding": {
          "aaCodingIndex": 17.76,
          "terminalBenchHard": 12.1,
          "aaSciCode": 29.1
        },
        "reasoning": {
          "lcr": 45.9,
          "critpt": 1.4
        },
        "multimodalGrounded": {
          "aaMmmuPro": 65.5,
          "designArenaWebsite": 1160
        },
        "knowledge": {
          "artificialAnalysis": 20.56,
          "aaGpqaDiamond": 68.3,
          "aaHle": 5.1,
          "aaOmniscienceIndex": -42,
          "omniscienceAccuracy": 26.5,
          "omniscienceHallucinationRate": 93.3
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 39
        },
        "math": {}
      }
    },
    {
      "slug": "gemini-1-5-pro",
      "canonicalModelKey": "gemini-1-5-pro",
      "model": "Gemini 1.5 Pro",
      "creator": "Google",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "2M",
      "contextWindowTokens": 2000000,
      "displayScore": 35,
      "rankingEligible": true,
      "overallRank": 89,
      "url": "https://benchlm.ai/models/gemini-1-5-pro",
      "markdownUrl": "https://benchlm.ai/md/models/gemini-1-5-pro.md",
      "id": 114,
      "releaseDate": "2024-02-15",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "gemini-1-5-pro",
        "familyName": "Gemini 1.5 Pro",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "gemini-1-5-pro",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 35,
        "overallScore": 38,
        "rawOverallScore": 38,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 33.7,
          "coding": 14.9,
          "reasoning": 52.8,
          "multimodalGrounded": 65.5,
          "knowledge": 35.5,
          "multilingual": 31.9,
          "instructionFollowing": 36.6,
          "math": 45.6
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 89,
        "categoryRanks": {
          "agentic": 67,
          "coding": 87,
          "reasoning": 50,
          "multimodalGrounded": 39,
          "knowledge": 79,
          "multilingual": 80,
          "instructionFollowing": 100,
          "math": 56
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 32,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {
          "aaCodingIndex": 23.63,
          "aaSciCode": 29.5
        },
        "reasoning": {},
        "multimodalGrounded": {
          "aaMmmuPro": 55
        },
        "knowledge": {
          "artificialAnalysis": 15.99,
          "aaGpqaDiamond": 58.9,
          "aaHle": 4.9
        },
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "deepseek-v3",
      "canonicalModelKey": "deepseek-v3",
      "model": "DeepSeek V3",
      "creator": "DeepSeek",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 34,
      "rankingEligible": true,
      "overallRank": 90,
      "url": "https://benchlm.ai/models/deepseek-v3",
      "markdownUrl": "https://benchlm.ai/md/models/deepseek-v3.md",
      "id": 112,
      "releaseDate": "2024-12-26",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "deepseek",
        "familyName": "DeepSeek",
        "variantType": "snapshot",
        "snapshotLabel": "V3",
        "baseFamilyModelKey": "deepseek-v3",
        "relatedModelKeys": [
          "deepseek-v3-1",
          "deepseek-v3-2"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 34,
        "overallScore": 39,
        "rawOverallScore": 39,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": 28.5,
          "reasoning": 16.5,
          "multimodalGrounded": null,
          "knowledge": 42.7,
          "multilingual": null,
          "instructionFollowing": 61.2,
          "math": 69.6
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 90,
        "categoryRanks": {
          "knowledge": 72,
          "instructionFollowing": 69
        },
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": true,
          "multilingual": false,
          "instructionFollowing": true,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 10,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 8.83,
          "tau2Bench": 22.8,
          "gdpvalAaNormalized": 0,
          "gdpvalAa": 409
        },
        "coding": {
          "liveCodeBench": 37.6,
          "sweVerified": 42,
          "aaCodingIndex": 16.35,
          "terminalBenchHard": 6.8,
          "aaSciCode": 35.4
        },
        "reasoning": {
          "lcr": 29,
          "critpt": 0
        },
        "multimodalGrounded": {
          "designArenaWebsite": 1165
        },
        "knowledge": {
          "gpqa": 59.1,
          "mmluPro": 75.9,
          "artificialAnalysis": 16.46,
          "aaGpqaDiamond": 55.7,
          "aaHle": 3.6,
          "aaOmniscienceIndex": -41.3,
          "omniscienceAccuracy": 25.4,
          "omniscienceHallucinationRate": 89.4
        },
        "multilingual": {},
        "instructionFollowing": {
          "ifeval": 86.1,
          "aaIfBench": 34.8
        },
        "math": {}
      }
    },
    {
      "slug": "gpt-oss-120b",
      "canonicalModelKey": "gpt-oss-120b",
      "model": "GPT-OSS 120B",
      "creator": "OpenAI",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 34,
      "rankingEligible": true,
      "overallRank": 91,
      "url": "https://benchlm.ai/models/gpt-oss-120b",
      "markdownUrl": "https://benchlm.ai/md/models/gpt-oss-120b.md",
      "id": 129,
      "releaseDate": "2025-08-05",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "gpt-oss",
        "familyName": "GPT-OSS",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "gpt-oss-120b",
        "relatedModelKeys": [
          "gpt-oss-20b"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 34,
        "overallScore": 28,
        "rawOverallScore": 28,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 34.4,
          "coding": 21.4,
          "reasoning": 30.2,
          "multimodalGrounded": 18.9,
          "knowledge": 44.6,
          "multilingual": 35.3,
          "instructionFollowing": 38.5,
          "math": 26.3
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 91,
        "categoryRanks": {
          "agentic": 66,
          "coding": 78,
          "reasoning": 70,
          "multimodalGrounded": 91,
          "knowledge": 69,
          "multilingual": 74,
          "instructionFollowing": 98,
          "math": 73
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 1,
        "verifiedBenchmarkCount": 1,
        "rankableBenchmarkCount": 35,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 37.87,
          "apexAgentsAa": 3.1,
          "tau2Bench": 65.8,
          "gdpvalAaNormalized": 22.4,
          "gdpvalAa": 947,
          "gertLabs": 29.61
        },
        "coding": {
          "reactNativeEvals": 71.6,
          "aaCodingIndex": 28.62,
          "terminalBenchHard": 23.5,
          "aaSciCode": 38.9
        },
        "reasoning": {
          "lcr": 50.7,
          "critpt": 1.1
        },
        "multimodalGrounded": {
          "designArenaWebsite": 1013
        },
        "knowledge": {
          "artificialAnalysis": 33.27,
          "aaGpqaDiamond": 78.2,
          "aaHle": 18.5,
          "aaOmniscienceIndex": -50,
          "omniscienceAccuracy": 21.5,
          "omniscienceHallucinationRate": 91.2
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 69
        },
        "math": {}
      }
    },
    {
      "slug": "claude-3-opus",
      "canonicalModelKey": "claude-3-opus",
      "model": "Claude 3 Opus",
      "creator": "Anthropic",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "200K",
      "contextWindowTokens": 200000,
      "displayScore": 34,
      "rankingEligible": true,
      "overallRank": 92,
      "url": "https://benchlm.ai/models/claude-3-opus",
      "markdownUrl": "https://benchlm.ai/md/models/claude-3-opus.md",
      "id": 124,
      "releaseDate": "2024-03-01",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "claude-3-opus",
        "familyName": "Claude 3 Opus",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "claude-3-opus",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 34,
        "overallScore": 33,
        "rawOverallScore": 33,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 33.4,
          "coding": 13.4,
          "reasoning": 43,
          "multimodalGrounded": 58.9,
          "knowledge": 31.5,
          "multilingual": 32.7,
          "instructionFollowing": 36.6,
          "math": 42
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 92,
        "categoryRanks": {
          "agentic": 68,
          "coding": 88,
          "reasoning": 61,
          "multimodalGrounded": 55,
          "knowledge": 84,
          "multilingual": 78,
          "instructionFollowing": 101,
          "math": 59
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 32,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {
          "aaCodingIndex": 19.53,
          "aaSciCode": 23.3
        },
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {
          "artificialAnalysis": 18,
          "aaGpqaDiamond": 48.9,
          "aaHle": 3.1
        },
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "minicpm5-1b",
      "canonicalModelKey": "minicpm5-1b",
      "model": "MiniCPM5-1B",
      "creator": "OpenBMB",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "131K",
      "contextWindowTokens": 131000,
      "displayScore": 34,
      "rankingEligible": true,
      "overallRank": 93,
      "url": "https://benchlm.ai/models/minicpm5-1b",
      "markdownUrl": "https://benchlm.ai/md/models/minicpm5-1b.md",
      "id": 123,
      "releaseDate": "2026-05-25",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "minicpm5",
        "familyName": "MiniCPM5",
        "variantType": "1b",
        "snapshotLabel": null,
        "baseFamilyModelKey": "minicpm5-1b",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 34,
        "overallScore": 34,
        "rawOverallScore": 34,
        "verifiedDisplayScore": 51,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": 31.9,
          "math": 37
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 39.8,
          "multilingual": null,
          "instructionFollowing": 68.6,
          "math": 59.6
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": true,
        "overallRank": 93,
        "categoryRanks": {
          "instructionFollowing": 110,
          "math": 64
        },
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 14,
        "verifiedBenchmarkCount": 14,
        "rankableBenchmarkCount": 14,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "bfclV4": 25.15
        },
        "coding": {
          "liveCodeBenchPro": 22.68,
          "liveCodeBenchV6": 33.52
        },
        "reasoning": {
          "bbh": 71.89
        },
        "multimodalGrounded": {},
        "knowledge": {
          "mmluPro": 48.85,
          "mmluRedux": 70.06,
          "gpqaDiamond": 26.26,
          "superGpqa": 23.14
        },
        "multilingual": {},
        "instructionFollowing": {
          "ifBench": 46.67,
          "ifeval": 80.41
        },
        "math": {
          "aime2025": 40.42,
          "aime2026": 40.42,
          "hmmtFeb2026": 25.76,
          "math500": 91.6
        }
      }
    },
    {
      "slug": "deepseek-r1",
      "canonicalModelKey": "deepseek-r1",
      "model": "DeepSeek-R1",
      "creator": "DeepSeek",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 32,
      "rankingEligible": true,
      "overallRank": 94,
      "url": "https://benchlm.ai/models/deepseek-r1",
      "markdownUrl": "https://benchlm.ai/md/models/deepseek-r1.md",
      "id": 142,
      "releaseDate": "2025-01-20",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "deepseek-r1",
        "familyName": "DeepSeek-R1",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "deepseek-r1",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 32,
        "overallScore": 26,
        "rawOverallScore": 26,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 31.2,
          "coding": 26.4,
          "reasoning": 23.5,
          "multimodalGrounded": 17,
          "knowledge": 38.9,
          "multilingual": 6.4,
          "instructionFollowing": 54.6,
          "math": 33.6
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 94,
        "categoryRanks": {
          "agentic": 73,
          "coding": 74,
          "reasoning": 80,
          "multimodalGrounded": 94,
          "knowledge": 76,
          "multilingual": 91,
          "instructionFollowing": 80,
          "math": 67
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 33,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 20.84,
          "tau2Bench": 36.5,
          "gdpvalAaNormalized": 9,
          "gdpvalAa": 680
        },
        "coding": {
          "aaCodingIndex": 24.03,
          "terminalBenchHard": 15.9,
          "aaSciCode": 40.3
        },
        "reasoning": {
          "lcr": 54.7,
          "critpt": 1.4
        },
        "multimodalGrounded": {},
        "knowledge": {
          "artificialAnalysis": 27.07,
          "aaGpqaDiamond": 81.3,
          "aaHle": 14.9,
          "aaOmniscienceIndex": -27.1,
          "omniscienceAccuracy": 31,
          "omniscienceHallucinationRate": 84
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 39.6
        },
        "math": {}
      }
    },
    {
      "slug": "qwen3-235b-2507",
      "canonicalModelKey": "qwen3-235b-2507",
      "model": "Qwen3 235B 2507",
      "creator": "Alibaba",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 32,
      "rankingEligible": true,
      "overallRank": 95,
      "url": "https://benchlm.ai/models/qwen3-235b-2507",
      "markdownUrl": "https://benchlm.ai/md/models/qwen3-235b-2507.md",
      "id": 144,
      "releaseDate": "2025-07-01",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "qwen3-235b-2507",
        "familyName": "Qwen3 235B 2507",
        "variantType": "base",
        "snapshotLabel": "2507",
        "baseFamilyModelKey": "qwen3-235b-2507",
        "relatedModelKeys": [
          "qwen3-235b-2507-reasoning"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 32,
        "overallScore": 23,
        "rawOverallScore": 23,
        "verifiedDisplayScore": 77,
        "displayCategoryScores": {
          "agentic": 16.4,
          "coding": 22.8,
          "reasoning": 25.6,
          "multimodalGrounded": 6.5,
          "knowledge": 40,
          "multilingual": 43.7,
          "instructionFollowing": 79.5,
          "math": 24.6
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 76.2,
          "multilingual": 79.4,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 95,
        "categoryRanks": {
          "agentic": 91,
          "coding": 77,
          "reasoning": 78,
          "multimodalGrounded": 100,
          "knowledge": 74,
          "multilingual": 66,
          "instructionFollowing": 40,
          "math": 74
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 4,
        "verifiedBenchmarkCount": 4,
        "rankableBenchmarkCount": 32,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {
          "gpqa": 77.5,
          "superGpqa": 62.6,
          "mmluPro": 83
        },
        "multilingual": {
          "mmluProX": 79.4
        },
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "dbrx-instruct",
      "canonicalModelKey": "dbrx-instruct",
      "model": "DBRX Instruct",
      "creator": "Databricks",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "32K",
      "contextWindowTokens": 32000,
      "displayScore": 32,
      "rankingEligible": true,
      "overallRank": 96,
      "url": "https://benchlm.ai/models/dbrx-instruct",
      "markdownUrl": "https://benchlm.ai/md/models/dbrx-instruct.md",
      "id": 154,
      "releaseDate": null,
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "dbrx",
        "familyName": "DBRX",
        "variantType": "instruct",
        "snapshotLabel": null,
        "baseFamilyModelKey": "dbrx-instruct",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 32,
        "overallScore": 32,
        "rawOverallScore": 32,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 8.5,
          "coding": 60,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 29.4,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 96,
        "categoryRanks": {
          "agentic": 98,
          "reasoning": 90,
          "multimodalGrounded": 107,
          "multilingual": 99
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": false,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": false,
          "multilingual": true,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 13,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "grok-3-beta",
      "canonicalModelKey": "grok-3-beta",
      "model": "Grok 3 [Beta]",
      "creator": "xAI",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 30,
      "rankingEligible": true,
      "overallRank": 97,
      "url": "https://benchlm.ai/models/grok-3-beta",
      "markdownUrl": "https://benchlm.ai/md/models/grok-3-beta.md",
      "id": 149,
      "releaseDate": "2025-02-19",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "grok-3",
        "familyName": "Grok 3",
        "variantType": "snapshot",
        "snapshotLabel": "beta",
        "baseFamilyModelKey": "grok-3-beta",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 30,
        "overallScore": 18,
        "rawOverallScore": 18,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 21,
          "coding": 28.2,
          "reasoning": 28.6,
          "multimodalGrounded": 9.4,
          "knowledge": 34.2,
          "multilingual": 1.8,
          "instructionFollowing": null,
          "math": 10.9
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 97,
        "categoryRanks": {
          "agentic": 81,
          "coding": 72,
          "reasoning": 73,
          "multimodalGrounded": 97,
          "knowledge": 83,
          "multilingual": 98,
          "instructionFollowing": 122,
          "math": 78
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 32,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "deepseek-v3-1-reasoning",
      "canonicalModelKey": "deepseek-v3-1-reasoning",
      "model": "DeepSeek V3.1 (Reasoning)",
      "creator": "DeepSeek",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 29,
      "rankingEligible": true,
      "overallRank": 98,
      "url": "https://benchlm.ai/models/deepseek-v3-1-reasoning",
      "markdownUrl": "https://benchlm.ai/md/models/deepseek-v3-1-reasoning.md",
      "id": 155,
      "releaseDate": "2025-08-21",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "deepseek-v3-1",
        "familyName": "DeepSeek V3.1",
        "variantType": "reasoning",
        "snapshotLabel": null,
        "baseFamilyModelKey": "deepseek-v3-1",
        "relatedModelKeys": [
          "deepseek-v3-1"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 29,
        "overallScore": 15,
        "rawOverallScore": 15,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 32.9,
          "coding": 21,
          "reasoning": 26.7,
          "multimodalGrounded": 6.2,
          "knowledge": 13.4,
          "multilingual": 11.2,
          "instructionFollowing": 14.1,
          "math": 7.3
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 98,
        "categoryRanks": {
          "agentic": 70,
          "coding": 81,
          "reasoning": 76,
          "multimodalGrounded": 101,
          "knowledge": 100,
          "multilingual": 86,
          "instructionFollowing": 116,
          "math": 82
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 33,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 18.85,
          "tau2Bench": 37.4,
          "gdpvalAaNormalized": 5.6,
          "gdpvalAa": 612
        },
        "coding": {
          "aaCodingIndex": 29.71,
          "terminalBenchHard": 25,
          "aaSciCode": 39.1
        },
        "reasoning": {
          "lcr": 53.3,
          "critpt": 2
        },
        "multimodalGrounded": {
          "designArenaWebsite": 1168
        },
        "knowledge": {
          "artificialAnalysis": 27.71,
          "aaGpqaDiamond": 77.9,
          "aaHle": 13,
          "aaOmniscienceIndex": -28.4,
          "omniscienceAccuracy": 28.8,
          "omniscienceHallucinationRate": 80.3
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 41.5
        },
        "math": {}
      }
    },
    {
      "slug": "o1-pro",
      "canonicalModelKey": "o1-pro",
      "model": "o1-pro",
      "creator": "OpenAI",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "200K",
      "contextWindowTokens": 200000,
      "displayScore": 28,
      "rankingEligible": true,
      "overallRank": 99,
      "url": "https://benchlm.ai/models/o1-pro",
      "markdownUrl": "https://benchlm.ai/md/models/o1-pro.md",
      "id": 141,
      "releaseDate": "2024-12-01",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "o1",
        "familyName": "o1",
        "variantType": "pro",
        "snapshotLabel": null,
        "baseFamilyModelKey": "o1",
        "relatedModelKeys": [
          "o1",
          "o1-preview"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 28,
        "overallScore": 26,
        "rawOverallScore": 26,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 16.2,
          "coding": 14.5,
          "reasoning": 33.5,
          "multimodalGrounded": 19.3,
          "knowledge": 60.8,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 99,
        "categoryRanks": {
          "agentic": 92,
          "reasoning": 67,
          "multimodalGrounded": 90,
          "multilingual": 100
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": false,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": false,
          "multilingual": true,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 13,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {
          "gpqa": 79,
          "artificialAnalysis": 25.76
        },
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "phi-4",
      "canonicalModelKey": "phi-4",
      "model": "Phi-4",
      "creator": "Microsoft",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "16K",
      "contextWindowTokens": 16000,
      "displayScore": 27,
      "rankingEligible": true,
      "overallRank": 100,
      "url": "https://benchlm.ai/models/phi-4",
      "markdownUrl": "https://benchlm.ai/md/models/phi-4.md",
      "id": 128,
      "releaseDate": "2025-01-01",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "phi-4",
        "familyName": "Phi-4",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "phi-4",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 27,
        "overallScore": 39,
        "rawOverallScore": 39,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 11.7,
          "coding": 48.1,
          "reasoning": null,
          "multimodalGrounded": 18.9,
          "knowledge": 32.8,
          "multilingual": 25.5,
          "instructionFollowing": null,
          "math": 78.1
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 100,
        "categoryRanks": {
          "agentic": 96,
          "reasoning": 91,
          "multimodalGrounded": 92,
          "multilingual": 82
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": false,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": false,
          "multilingual": true,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 17,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 0,
          "tau2Bench": 0
        },
        "coding": {
          "aaCodingIndex": 11.21,
          "terminalBenchHard": 3.8,
          "aaSciCode": 26
        },
        "reasoning": {
          "lcr": 0,
          "critpt": 0
        },
        "multimodalGrounded": {},
        "knowledge": {
          "artificialAnalysis": 10.41,
          "aaGpqaDiamond": 57.5,
          "aaHle": 4.1,
          "aaOmniscienceIndex": -56.7,
          "omniscienceAccuracy": 13.2,
          "omniscienceHallucinationRate": 80.5
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 23.5
        },
        "math": {}
      }
    },
    {
      "slug": "gpt-4-1-nano",
      "canonicalModelKey": "gpt-4-1-nano",
      "model": "GPT-4.1 nano",
      "creator": "OpenAI",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 27,
      "rankingEligible": true,
      "overallRank": 101,
      "url": "https://benchlm.ai/models/gpt-4-1-nano",
      "markdownUrl": "https://benchlm.ai/md/models/gpt-4-1-nano.md",
      "id": 121,
      "releaseDate": "2025-04-14",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "gpt-4-1",
        "familyName": "GPT-4.1",
        "variantType": "nano",
        "snapshotLabel": null,
        "baseFamilyModelKey": "gpt-4-1",
        "relatedModelKeys": [
          "gpt-4-1",
          "gpt-4-1-mini"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 27,
        "overallScore": 32,
        "rawOverallScore": 32,
        "verifiedDisplayScore": 60,
        "displayCategoryScores": {
          "agentic": 22.9,
          "coding": 8.7,
          "reasoning": 67.4,
          "multimodalGrounded": 37.9,
          "knowledge": 29.1,
          "multilingual": 5.6,
          "instructionFollowing": 59.3,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 50.3,
          "multilingual": null,
          "instructionFollowing": 83.2,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 101,
        "categoryRanks": {
          "agentic": 77,
          "reasoning": 28,
          "multimodalGrounded": 78,
          "multilingual": 92,
          "instructionFollowing": 70
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": false,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": false,
          "multilingual": true,
          "instructionFollowing": true,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 3,
        "verifiedBenchmarkCount": 3,
        "rankableBenchmarkCount": 16,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 5.75,
          "tau2Bench": 17.3,
          "gdpvalAaNormalized": 0,
          "gdpvalAa": 318
        },
        "coding": {
          "aaCodingIndex": 11.17,
          "terminalBenchHard": 3.8,
          "aaSciCode": 25.9
        },
        "reasoning": {
          "lcr": 17,
          "critpt": 0
        },
        "multimodalGrounded": {
          "aaMmmuPro": 40.1,
          "designArenaWebsite": 1018
        },
        "knowledge": {
          "mmlu": 80.1,
          "gpqa": 50.3,
          "artificialAnalysis": 13.04,
          "aaGpqaDiamond": 51.2,
          "aaHle": 3.9,
          "aaOmniscienceIndex": -56.4,
          "omniscienceAccuracy": 13.3,
          "omniscienceHallucinationRate": 80.4
        },
        "multilingual": {},
        "instructionFollowing": {
          "ifeval": 83.2,
          "aaIfBench": 32
        },
        "math": {},
        "korean": {}
      }
    },
    {
      "slug": "glm-4-5",
      "canonicalModelKey": "glm-4-5",
      "model": "GLM-4.5",
      "creator": "Z.AI",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 25,
      "rankingEligible": true,
      "overallRank": 102,
      "url": "https://benchlm.ai/models/glm-4-5",
      "markdownUrl": "https://benchlm.ai/md/models/glm-4-5.md",
      "id": 164,
      "releaseDate": "2025-06-01",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "glm-4-5",
        "familyName": "GLM-4.5",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "glm-4-5",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 25,
        "overallScore": 7,
        "rawOverallScore": 7,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 22.9,
          "coding": 7.8,
          "reasoning": 19.8,
          "multimodalGrounded": 5.2,
          "knowledge": 11.7,
          "multilingual": null,
          "instructionFollowing": 8.5,
          "math": 7.5
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 102,
        "categoryRanks": {
          "agentic": 78,
          "coding": 94,
          "reasoning": 83,
          "multimodalGrounded": 103,
          "knowledge": 101,
          "multilingual": 101,
          "instructionFollowing": 119,
          "math": 81
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 34,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {
          "designArenaWebsite": 1215
        },
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "llama-4-scout",
      "canonicalModelKey": "llama-4-scout",
      "model": "Llama 4 Scout",
      "creator": "Meta",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "10M",
      "contextWindowTokens": 10000000,
      "displayScore": 25,
      "rankingEligible": true,
      "overallRank": 103,
      "url": "https://benchlm.ai/models/llama-4-scout",
      "markdownUrl": "https://benchlm.ai/md/models/llama-4-scout.md",
      "id": 151,
      "releaseDate": "2026-02-28",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "llama-4-scout",
        "familyName": "Llama 4 Scout",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "llama-4-scout",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 25,
        "overallScore": 23,
        "rawOverallScore": 23,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 17.2,
          "coding": null,
          "reasoning": 41.1,
          "multimodalGrounded": 36.4,
          "knowledge": 15.4,
          "multilingual": 4.7,
          "instructionFollowing": 18.8,
          "math": 15.9
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 103,
        "categoryRanks": {
          "agentic": 89,
          "coding": 101,
          "reasoning": 62,
          "multimodalGrounded": 79,
          "knowledge": 97,
          "multilingual": 93,
          "instructionFollowing": 113,
          "math": 76
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 35,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 5.17,
          "tau2Bench": 15.5,
          "gdpvalAaNormalized": 0,
          "gdpvalAa": 269
        },
        "coding": {
          "aaCodingIndex": 6.68,
          "terminalBenchHard": 1.5,
          "aaSciCode": 17
        },
        "reasoning": {
          "lcr": 25.8,
          "critpt": 0
        },
        "multimodalGrounded": {
          "aaMmmuPro": 52.9,
          "designArenaWebsite": 796
        },
        "knowledge": {
          "artificialAnalysis": 13.52,
          "aaGpqaDiamond": 58.7,
          "aaHle": 4.3,
          "aaOmniscienceIndex": -52.4,
          "omniscienceAccuracy": 14.6,
          "omniscienceHallucinationRate": 78.3
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 39.5
        },
        "math": {}
      }
    },
    {
      "slug": "nemotron-3-nano-30b",
      "canonicalModelKey": "nemotron-3-nano-30b",
      "model": "Nemotron 3 Nano 30B",
      "creator": "NVIDIA",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "32K",
      "contextWindowTokens": 32000,
      "displayScore": 25,
      "rankingEligible": true,
      "overallRank": 104,
      "url": "https://benchlm.ai/models/nemotron-3-nano-30b",
      "markdownUrl": "https://benchlm.ai/md/models/nemotron-3-nano-30b.md",
      "id": 145,
      "releaseDate": "2026-01-15",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "nemotron-3-nano-30b",
        "familyName": "Nemotron 3 Nano 30B",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "nemotron-3-nano-30b",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 25,
        "overallScore": 22,
        "rawOverallScore": 22,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 19.8,
          "coding": 16.8,
          "reasoning": 28.1,
          "multimodalGrounded": 12.5,
          "knowledge": 29.8,
          "multilingual": 38.3,
          "instructionFollowing": 40.3,
          "math": 37.1
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 104,
        "categoryRanks": {
          "agentic": 82,
          "coding": 84,
          "reasoning": 74,
          "multimodalGrounded": 95,
          "knowledge": 86,
          "multilingual": 71,
          "instructionFollowing": 95,
          "math": 63
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 32,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 8.48,
          "tau2Bench": 25.4,
          "gdpvalAaNormalized": 0,
          "gdpvalAa": 347
        },
        "coding": {
          "aaCodingIndex": 15.76,
          "terminalBenchHard": 12.1,
          "aaSciCode": 23
        },
        "reasoning": {
          "lcr": 6.7,
          "critpt": 0
        },
        "multimodalGrounded": {},
        "knowledge": {
          "artificialAnalysis": 13.17,
          "aaGpqaDiamond": 39.9,
          "aaHle": 4.6,
          "aaOmniscienceIndex": -69.2,
          "omniscienceAccuracy": 11.4,
          "omniscienceHallucinationRate": 90.9
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 37.5
        },
        "math": {}
      }
    },
    {
      "slug": "llama-3-70b",
      "canonicalModelKey": "llama-3-70b",
      "model": "Llama 3 70B",
      "creator": "Meta",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 25,
      "rankingEligible": true,
      "overallRank": 105,
      "url": "https://benchlm.ai/models/llama-3-70b",
      "markdownUrl": "https://benchlm.ai/md/models/llama-3-70b.md",
      "id": 143,
      "releaseDate": "2024-04-18",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "llama-3-70b",
        "familyName": "Llama 3 70B",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "llama-3-70b",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 25,
        "overallScore": 23,
        "rawOverallScore": 23,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 22,
          "coding": 2.6,
          "reasoning": 39.4,
          "multimodalGrounded": 25.8,
          "knowledge": 25.8,
          "multilingual": 26.2,
          "instructionFollowing": 36.6,
          "math": 37.2
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 105,
        "categoryRanks": {
          "agentic": 79,
          "coding": 98,
          "reasoning": 63,
          "multimodalGrounded": 86,
          "knowledge": 90,
          "multilingual": 81,
          "instructionFollowing": 102,
          "math": 62
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 32,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "deepseek-v3-1",
      "canonicalModelKey": "deepseek-v3-1",
      "model": "DeepSeek V3.1",
      "creator": "DeepSeek",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 24,
      "rankingEligible": true,
      "overallRank": 106,
      "url": "https://benchlm.ai/models/deepseek-v3-1",
      "markdownUrl": "https://benchlm.ai/md/models/deepseek-v3-1.md",
      "id": 166,
      "releaseDate": "2025-08-21",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "deepseek-v3-1",
        "familyName": "DeepSeek V3.1",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "deepseek-v3-1",
        "relatedModelKeys": [
          "deepseek-v3-1-reasoning"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 24,
        "overallScore": 5,
        "rawOverallScore": 5,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 25.9,
          "coding": 15.9,
          "reasoning": 15.3,
          "multimodalGrounded": 3.7,
          "knowledge": 9.8,
          "multilingual": 7.5,
          "instructionFollowing": null,
          "math": 5.5
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 106,
        "categoryRanks": {
          "agentic": 74,
          "coding": 86,
          "reasoning": 85,
          "multimodalGrounded": 104,
          "knowledge": 103,
          "multilingual": 89,
          "instructionFollowing": 123,
          "math": 83
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 33,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 31.94,
          "tau2Bench": 34.8,
          "gdpvalAaNormalized": 28.7,
          "gdpvalAa": 1075
        },
        "coding": {
          "aaCodingIndex": 28.39,
          "terminalBenchHard": 24.2,
          "aaSciCode": 36.7
        },
        "reasoning": {
          "lcr": 45,
          "critpt": 0
        },
        "multimodalGrounded": {
          "designArenaWebsite": 1168
        },
        "knowledge": {
          "artificialAnalysis": 28.13,
          "aaGpqaDiamond": 73.5,
          "aaHle": 6.3,
          "aaOmniscienceIndex": -41.1,
          "omniscienceAccuracy": 23.1,
          "omniscienceHallucinationRate": 83.5
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 37.8
        },
        "math": {}
      }
    },
    {
      "slug": "gpt-4-turbo",
      "canonicalModelKey": "gpt-4-turbo",
      "model": "GPT-4 Turbo",
      "creator": "OpenAI",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 24,
      "rankingEligible": true,
      "overallRank": 107,
      "url": "https://benchlm.ai/models/gpt-4-turbo",
      "markdownUrl": "https://benchlm.ai/md/models/gpt-4-turbo.md",
      "id": 135,
      "releaseDate": "2023-11-06",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "gpt-4-turbo",
        "familyName": "GPT-4 Turbo",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "gpt-4-turbo",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 24,
        "overallScore": 27,
        "rawOverallScore": 27,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 23.1,
          "coding": 12.6,
          "reasoning": 38.7,
          "multimodalGrounded": 31.2,
          "knowledge": 28.6,
          "multilingual": 22.4,
          "instructionFollowing": 47.6,
          "math": 39.6
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 107,
        "categoryRanks": {
          "agentic": 76,
          "coding": 89,
          "reasoning": 64,
          "multimodalGrounded": 84,
          "knowledge": 87,
          "multilingual": 84,
          "instructionFollowing": 85,
          "math": 61
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 30,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {
          "aaCodingIndex": 21.49,
          "aaSciCode": 31.9
        },
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {
          "artificialAnalysis": 13.72,
          "aaHle": 3.3
        },
        "multilingual": {},
        "instructionFollowing": {},
        "math": {},
        "korean": {}
      }
    },
    {
      "slug": "gemini-1-0-pro",
      "canonicalModelKey": "gemini-1-0-pro",
      "model": "Gemini 1.0 Pro",
      "creator": "Google",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "32K",
      "contextWindowTokens": 32000,
      "displayScore": 24,
      "rankingEligible": true,
      "overallRank": 108,
      "url": "https://benchlm.ai/models/gemini-1-0-pro",
      "markdownUrl": "https://benchlm.ai/md/models/gemini-1-0-pro.md",
      "id": 146,
      "releaseDate": null,
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "gemini-1-0-pro",
        "familyName": "Gemini 1.0 Pro",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "gemini-1-0-pro",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 24,
        "overallScore": 30,
        "rawOverallScore": 30,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 14.6,
          "coding": null,
          "reasoning": 27.4,
          "multimodalGrounded": 55.2,
          "knowledge": 25,
          "multilingual": 24.4,
          "instructionFollowing": 36.6,
          "math": 42.6
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 108,
        "categoryRanks": {
          "agentic": 93,
          "reasoning": 75,
          "multimodalGrounded": 63,
          "knowledge": 94,
          "multilingual": 83,
          "instructionFollowing": 103,
          "math": 58
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": false,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 31,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {
          "aaSciCode": 11.7
        },
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {
          "artificialAnalysis": 8.5,
          "aaGpqaDiamond": 27.7,
          "aaHle": 4.6
        },
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "z-1",
      "canonicalModelKey": "z-1",
      "model": "Z-1",
      "creator": "Z",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 23,
      "rankingEligible": true,
      "overallRank": 109,
      "url": "https://benchlm.ai/models/z-1",
      "markdownUrl": "https://benchlm.ai/md/models/z-1.md",
      "id": 138,
      "releaseDate": null,
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "z-1",
        "familyName": "Z-1",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "z-1",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 23,
        "overallScore": 27,
        "rawOverallScore": 27,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 19.3,
          "coding": 21.3,
          "reasoning": 29.4,
          "multimodalGrounded": 22.4,
          "knowledge": 26,
          "multilingual": 40.9,
          "instructionFollowing": 47.6,
          "math": 31
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 109,
        "categoryRanks": {
          "agentic": 83,
          "coding": 79,
          "reasoning": 72,
          "multimodalGrounded": 87,
          "knowledge": 89,
          "multilingual": 68,
          "instructionFollowing": 86,
          "math": 70
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 32,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "mistral-8x7b",
      "canonicalModelKey": "mistral-8x7b",
      "model": "Mistral 8x7B",
      "creator": "Mistral",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "32K",
      "contextWindowTokens": 32000,
      "displayScore": 23,
      "rankingEligible": true,
      "overallRank": 110,
      "url": "https://benchlm.ai/models/mistral-8x7b",
      "markdownUrl": "https://benchlm.ai/md/models/mistral-8x7b.md",
      "id": 133,
      "releaseDate": null,
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "mistral-8x7b",
        "familyName": "Mistral 8x7B",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "mistral-8x7b",
        "relatedModelKeys": [
          "mistral-8x7b-v0-2"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 23,
        "overallScore": 28,
        "rawOverallScore": 28,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 17.2,
          "coding": 18.8,
          "reasoning": 32.2,
          "multimodalGrounded": 18.2,
          "knowledge": 35.1,
          "multilingual": 39.1,
          "instructionFollowing": 40.3,
          "math": 46.9
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 110,
        "categoryRanks": {
          "agentic": 90,
          "coding": 83,
          "reasoning": 69,
          "multimodalGrounded": 93,
          "knowledge": 81,
          "multilingual": 70,
          "instructionFollowing": 96,
          "math": 54
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 32,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "claude-3-haiku",
      "canonicalModelKey": "claude-3-haiku",
      "model": "Claude 3 Haiku",
      "creator": "Anthropic",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "200K",
      "contextWindowTokens": 200000,
      "displayScore": 23,
      "rankingEligible": true,
      "overallRank": 111,
      "url": "https://benchlm.ai/models/claude-3-haiku",
      "markdownUrl": "https://benchlm.ai/md/models/claude-3-haiku.md",
      "id": 130,
      "releaseDate": "2024-03-01",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "claude-3-haiku",
        "familyName": "Claude 3 Haiku",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "claude-3-haiku",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 23,
        "overallScore": 30,
        "rawOverallScore": 30,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 18.3,
          "coding": 7.7,
          "reasoning": 37.4,
          "multimodalGrounded": 55.8,
          "knowledge": 25.4,
          "multilingual": 36.3,
          "instructionFollowing": 33,
          "math": 34.7
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 111,
        "categoryRanks": {
          "agentic": 86,
          "coding": 95,
          "reasoning": 65,
          "multimodalGrounded": 62,
          "knowledge": 92,
          "multilingual": 72,
          "instructionFollowing": 108,
          "math": 66
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 32,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 7.02,
          "tau2Bench": 21.1,
          "gdpvalAaNormalized": 0,
          "gdpvalAa": 378
        },
        "coding": {
          "aaCodingIndex": 6.72,
          "terminalBenchHard": 0.8,
          "aaSciCode": 18.6
        },
        "reasoning": {
          "lcr": 21,
          "critpt": 0
        },
        "multimodalGrounded": {
          "aaMmmuPro": 30.8
        },
        "knowledge": {
          "artificialAnalysis": 12.26,
          "aaGpqaDiamond": 37.4,
          "aaHle": 3.9,
          "aaOmniscienceIndex": -47.6,
          "omniscienceAccuracy": 17.2,
          "omniscienceHallucinationRate": 78.2
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 36.1
        },
        "math": {}
      }
    },
    {
      "slug": "mixtral-8x22b-instruct-v0-1",
      "canonicalModelKey": "mixtral-8x22b-instruct-v0-1",
      "model": "Mixtral 8x22B Instruct v0.1",
      "creator": "Mistral",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "64K",
      "contextWindowTokens": 64000,
      "displayScore": 22,
      "rankingEligible": true,
      "overallRank": 112,
      "url": "https://benchlm.ai/models/mixtral-8x22b-instruct-v0-1",
      "markdownUrl": "https://benchlm.ai/md/models/mixtral-8x22b-instruct-v0-1.md",
      "id": 158,
      "releaseDate": null,
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "mixtral-8x22b",
        "familyName": "Mixtral 8x22B",
        "variantType": "instruct",
        "snapshotLabel": "v0.1",
        "baseFamilyModelKey": "mixtral-8x22b-instruct-v0-1",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 22,
        "overallScore": 25,
        "rawOverallScore": 25,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 7.6,
          "coding": 36.4,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 30.1,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 112,
        "categoryRanks": {
          "agentic": 99,
          "reasoning": 92,
          "multimodalGrounded": 108,
          "multilingual": 102
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": false,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": false,
          "multilingual": true,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 13,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "nemotron-4-15b",
      "canonicalModelKey": "nemotron-4-15b",
      "model": "Nemotron-4 15B",
      "creator": "NVIDIA",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "32K",
      "contextWindowTokens": 32000,
      "displayScore": 22,
      "rankingEligible": true,
      "overallRank": 113,
      "url": "https://benchlm.ai/models/nemotron-4-15b",
      "markdownUrl": "https://benchlm.ai/md/models/nemotron-4-15b.md",
      "id": 140,
      "releaseDate": null,
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "nemotron-4-15b",
        "familyName": "Nemotron-4 15B",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "nemotron-4-15b",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 22,
        "overallScore": 25,
        "rawOverallScore": 25,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 17.7,
          "coding": 20.7,
          "reasoning": 24.4,
          "multimodalGrounded": 20.9,
          "knowledge": 25.7,
          "multilingual": 40.1,
          "instructionFollowing": 44,
          "math": 32.3
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 113,
        "categoryRanks": {
          "agentic": 88,
          "coding": 82,
          "reasoning": 79,
          "multimodalGrounded": 89,
          "knowledge": 91,
          "multilingual": 69,
          "instructionFollowing": 92,
          "math": 68
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 32,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "moonshot-v1",
      "canonicalModelKey": "moonshot-v1",
      "model": "Moonshot v1",
      "creator": "Moonshot AI",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 22,
      "rankingEligible": true,
      "overallRank": 114,
      "url": "https://benchlm.ai/models/moonshot-v1",
      "markdownUrl": "https://benchlm.ai/md/models/moonshot-v1.md",
      "id": 137,
      "releaseDate": null,
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "moonshot",
        "familyName": "Moonshot",
        "variantType": "snapshot",
        "snapshotLabel": "v1",
        "baseFamilyModelKey": "moonshot-v1",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 22,
        "overallScore": 27,
        "rawOverallScore": 27,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 18.6,
          "coding": 21.2,
          "reasoning": 30.1,
          "multimodalGrounded": 26.3,
          "knowledge": 25.2,
          "multilingual": 32.7,
          "instructionFollowing": 36.6,
          "math": 31.6
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 114,
        "categoryRanks": {
          "agentic": 84,
          "coding": 80,
          "reasoning": 71,
          "multimodalGrounded": 85,
          "knowledge": 93,
          "multilingual": 79,
          "instructionFollowing": 104,
          "math": 69
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 32,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "nemotron-ultra-253b",
      "canonicalModelKey": "nemotron-ultra-253b",
      "model": "Nemotron Ultra 253B",
      "creator": "NVIDIA",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "32K",
      "contextWindowTokens": 32000,
      "displayScore": 22,
      "rankingEligible": true,
      "overallRank": 115,
      "url": "https://benchlm.ai/models/nemotron-ultra-253b",
      "markdownUrl": "https://benchlm.ai/md/models/nemotron-ultra-253b.md",
      "id": 132,
      "releaseDate": "2026-02-01",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "nemotron-ultra-253b",
        "familyName": "Nemotron Ultra 253B",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "nemotron-ultra-253b",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 22,
        "overallScore": 28,
        "rawOverallScore": 28,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 18.6,
          "coding": 26.9,
          "reasoning": 26.6,
          "multimodalGrounded": 11.4,
          "knowledge": 24.9,
          "multilingual": 33.6,
          "instructionFollowing": 40.3,
          "math": 27.9
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 115,
        "categoryRanks": {
          "agentic": 85,
          "coding": 73,
          "reasoning": 77,
          "multimodalGrounded": 96,
          "knowledge": 95,
          "multilingual": 76,
          "instructionFollowing": 97,
          "math": 71
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 32,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 3.8,
          "tau2Bench": 11.4,
          "gdpvalAaNormalized": 0,
          "gdpvalAa": 238
        },
        "coding": {
          "aaCodingIndex": 13.09,
          "terminalBenchHard": 2.3,
          "aaSciCode": 34.7
        },
        "reasoning": {
          "lcr": 7.3,
          "critpt": 0
        },
        "multimodalGrounded": {},
        "knowledge": {
          "artificialAnalysis": 15.02,
          "aaGpqaDiamond": 72.8,
          "aaHle": 8.1,
          "aaOmniscienceIndex": -45.5,
          "omniscienceAccuracy": 19.9,
          "omniscienceHallucinationRate": 81.7
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 38.2
        },
        "math": {}
      }
    },
    {
      "slug": "glm-4-5-air",
      "canonicalModelKey": "glm-4-5-air",
      "model": "GLM-4.5-Air",
      "creator": "Z.AI",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 18,
      "rankingEligible": true,
      "overallRank": 116,
      "url": "https://benchlm.ai/models/glm-4-5-air",
      "markdownUrl": "https://benchlm.ai/md/models/glm-4-5-air.md",
      "id": 163,
      "releaseDate": "2025-06-01",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "glm-4-5-air",
        "familyName": "GLM-4.5-Air",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "glm-4-5-air",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 18,
        "overallScore": 4,
        "rawOverallScore": 4,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 17.8,
          "coding": 12.5,
          "reasoning": 14.8,
          "multimodalGrounded": 3,
          "knowledge": 8.9,
          "multilingual": 2.9,
          "instructionFollowing": 5.4,
          "math": 3.6
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 116,
        "categoryRanks": {
          "agentic": 87,
          "coding": 90,
          "reasoning": 86,
          "multimodalGrounded": 105,
          "knowledge": 104,
          "multilingual": 96,
          "instructionFollowing": 120,
          "math": 84
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 35,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 21.01,
          "tau2Bench": 46.5,
          "gdpvalAaNormalized": 3,
          "gdpvalAa": 560
        },
        "coding": {
          "aaCodingIndex": 23.82,
          "terminalBenchHard": 20.5,
          "aaSciCode": 30.6
        },
        "reasoning": {
          "lcr": 43.7,
          "critpt": 0
        },
        "multimodalGrounded": {
          "designArenaWebsite": 1192
        },
        "knowledge": {
          "artificialAnalysis": 23.17,
          "aaGpqaDiamond": 73.3,
          "aaHle": 6.8,
          "aaOmniscienceIndex": -62.5,
          "omniscienceAccuracy": 15.5,
          "omniscienceHallucinationRate": 92.3
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 37.6
        },
        "math": {}
      }
    },
    {
      "slug": "llama-4-maverick",
      "canonicalModelKey": "llama-4-maverick",
      "model": "Llama 4 Maverick",
      "creator": "Meta",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 17,
      "rankingEligible": true,
      "overallRank": 117,
      "url": "https://benchlm.ai/models/llama-4-maverick",
      "markdownUrl": "https://benchlm.ai/md/models/llama-4-maverick.md",
      "id": 150,
      "releaseDate": "2026-02-28",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "llama-4-maverick",
        "familyName": "Llama 4 Maverick",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "llama-4-maverick",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 17,
        "overallScore": 18,
        "rawOverallScore": 18,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 12.6,
          "coding": 8,
          "reasoning": 32.7,
          "multimodalGrounded": 34.6,
          "knowledge": 13.7,
          "multilingual": 4.7,
          "instructionFollowing": 22.7,
          "math": 10.3
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 117,
        "categoryRanks": {
          "agentic": 95,
          "coding": 93,
          "reasoning": 68,
          "multimodalGrounded": 81,
          "knowledge": 99,
          "multilingual": 94,
          "instructionFollowing": 112,
          "math": 79
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 38,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 7.22,
          "tau2Bench": 17.8,
          "gdpvalAaNormalized": 0,
          "gdpvalAa": 436
        },
        "coding": {
          "aaCodingIndex": 15.58,
          "terminalBenchHard": 6.8,
          "aaSciCode": 33.1
        },
        "reasoning": {
          "lcr": 46,
          "critpt": 0
        },
        "multimodalGrounded": {
          "aaMmmuPro": 62.1,
          "designArenaWebsite": 916
        },
        "knowledge": {
          "artificialAnalysis": 18.36,
          "aaGpqaDiamond": 67.1,
          "aaHle": 4.8,
          "aaOmniscienceIndex": -41.8,
          "omniscienceAccuracy": 24.3,
          "omniscienceHallucinationRate": 87.3
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 43
        },
        "math": {}
      }
    },
    {
      "slug": "gemma-3-27b",
      "canonicalModelKey": "gemma-3-27b",
      "model": "Gemma 3 27B",
      "creator": "Google",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "32K",
      "contextWindowTokens": 32000,
      "displayScore": 16,
      "rankingEligible": true,
      "overallRank": 118,
      "url": "https://benchlm.ai/models/gemma-3-27b",
      "markdownUrl": "https://benchlm.ai/md/models/gemma-3-27b.md",
      "id": 165,
      "releaseDate": "2025-03-12",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "gemma-3-27b",
        "familyName": "Gemma 3 27B",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "gemma-3-27b",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 16,
        "overallScore": 9,
        "rawOverallScore": 9,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 12.8,
          "coding": 5.5,
          "reasoning": 19.9,
          "multimodalGrounded": 6.9,
          "knowledge": 16.6,
          "multilingual": 11.1,
          "instructionFollowing": null,
          "math": 13.4
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 118,
        "categoryRanks": {
          "agentic": 94,
          "coding": 96,
          "reasoning": 82,
          "multimodalGrounded": 99,
          "knowledge": 96,
          "multilingual": 87,
          "instructionFollowing": 124,
          "math": 77
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 30,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 3.51,
          "tau2Bench": 10.5,
          "gdpvalAaNormalized": 0,
          "gdpvalAa": 283
        },
        "coding": {
          "aaCodingIndex": 9.59,
          "terminalBenchHard": 3.8,
          "aaSciCode": 21.2
        },
        "reasoning": {
          "lcr": 5.7,
          "critpt": 0
        },
        "multimodalGrounded": {
          "aaMmmuPro": 48
        },
        "knowledge": {
          "artificialAnalysis": 10.31,
          "aaGpqaDiamond": 42.8,
          "aaHle": 4.7,
          "aaOmniscienceIndex": -65.9,
          "omniscienceAccuracy": 12.5,
          "omniscienceHallucinationRate": 89.5
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 31.8
        },
        "math": {}
      }
    },
    {
      "slug": "gpt-oss-20b",
      "canonicalModelKey": "gpt-oss-20b",
      "model": "GPT-OSS 20B",
      "creator": "OpenAI",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 16,
      "rankingEligible": true,
      "overallRank": 119,
      "url": "https://benchlm.ai/models/gpt-oss-20b",
      "markdownUrl": "https://benchlm.ai/md/models/gpt-oss-20b.md",
      "id": 167,
      "releaseDate": "2025-08-05",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "gpt-oss",
        "familyName": "GPT-OSS",
        "variantType": "mini",
        "snapshotLabel": null,
        "baseFamilyModelKey": "gpt-oss-120b",
        "relatedModelKeys": [
          "gpt-oss-120b"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 16,
        "overallScore": 5,
        "rawOverallScore": 5,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 21.3,
          "coding": 9.3,
          "reasoning": 11.7,
          "multimodalGrounded": 1.5,
          "knowledge": 5.9,
          "multilingual": 4.6,
          "instructionFollowing": null,
          "math": 1.9
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 119,
        "categoryRanks": {
          "agentic": 80,
          "coding": 91,
          "reasoning": 88,
          "multimodalGrounded": 106,
          "knowledge": 105,
          "multilingual": 95,
          "instructionFollowing": 125,
          "math": 85
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 1,
        "verifiedBenchmarkCount": 1,
        "rankableBenchmarkCount": 34,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 27.6,
          "apexAgentsAa": 0.7,
          "tau2Bench": 60.2,
          "gdpvalAaNormalized": 7.4,
          "gdpvalAa": 647
        },
        "coding": {
          "reactNativeEvals": 71,
          "aaCodingIndex": 18.53,
          "terminalBenchHard": 10.6,
          "aaSciCode": 34.4
        },
        "reasoning": {
          "lcr": 30.7,
          "critpt": 1.4
        },
        "multimodalGrounded": {
          "designArenaWebsite": 898
        },
        "knowledge": {
          "artificialAnalysis": 24.47,
          "aaGpqaDiamond": 68.8,
          "aaHle": 9.8,
          "aaOmniscienceIndex": -63.9,
          "omniscienceAccuracy": 15.5,
          "omniscienceHallucinationRate": 94.1
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 65.1
        },
        "math": {}
      }
    },
    {
      "slug": "llama-4-behemoth",
      "canonicalModelKey": "llama-4-behemoth",
      "model": "Llama 4 Behemoth",
      "creator": "Meta",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "32K",
      "contextWindowTokens": 32000,
      "displayScore": 11,
      "rankingEligible": true,
      "overallRank": 120,
      "url": "https://benchlm.ai/models/llama-4-behemoth",
      "markdownUrl": "https://benchlm.ai/md/models/llama-4-behemoth.md",
      "id": 156,
      "releaseDate": "2026-02-28",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "llama-4-behemoth",
        "familyName": "Llama 4 Behemoth",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "llama-4-behemoth",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 11,
        "overallScore": 15,
        "rawOverallScore": 15,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 8.6,
          "coding": 3.9,
          "reasoning": 14.2,
          "multimodalGrounded": 31.9,
          "knowledge": 14.3,
          "multilingual": 13.1,
          "instructionFollowing": 26.5,
          "math": 18.8
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 120,
        "categoryRanks": {
          "agentic": 97,
          "coding": 97,
          "reasoning": 87,
          "multimodalGrounded": 83,
          "knowledge": 98,
          "multilingual": 85,
          "instructionFollowing": 111,
          "math": 75
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 37,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "nova-pro",
      "canonicalModelKey": "nova-pro",
      "model": "Nova Pro",
      "creator": "Amazon",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 10,
      "rankingEligible": true,
      "overallRank": 121,
      "url": "https://benchlm.ai/models/nova-pro",
      "markdownUrl": "https://benchlm.ai/md/models/nova-pro.md",
      "id": 162,
      "releaseDate": "2025-04-01",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "nova-pro",
        "familyName": "Nova Pro",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "nova-pro",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 10,
        "overallScore": 9,
        "rawOverallScore": 9,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 4.8,
          "coding": 8.6,
          "reasoning": 18,
          "multimodalGrounded": 5.5,
          "knowledge": 10.3,
          "multilingual": 8.4,
          "instructionFollowing": null,
          "math": 9.7
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 121,
        "categoryRanks": {
          "agentic": 100,
          "coding": 92,
          "reasoning": 84,
          "multimodalGrounded": 102,
          "knowledge": 102,
          "multilingual": 88,
          "instructionFollowing": 126,
          "math": 80
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 31,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 4.68,
          "tau2Bench": 14,
          "gdpvalAaNormalized": 0,
          "gdpvalAa": 386
        },
        "coding": {
          "aaCodingIndex": 10.98,
          "terminalBenchHard": 6.1,
          "aaSciCode": 20.8
        },
        "reasoning": {
          "lcr": 19,
          "critpt": 0
        },
        "multimodalGrounded": {
          "aaMmmuPro": 44.3
        },
        "knowledge": {
          "artificialAnalysis": 13.48,
          "aaGpqaDiamond": 49.9,
          "aaHle": 3.4,
          "aaOmniscienceIndex": -47.6,
          "omniscienceAccuracy": 17,
          "omniscienceHallucinationRate": 77.9
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 38.1
        },
        "math": {}
      }
    },
    {
      "slug": "mistral-7b-v0-3",
      "canonicalModelKey": "mistral-7b-v0-3",
      "model": "Mistral 7B v0.3",
      "creator": "Mistral",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "32K",
      "contextWindowTokens": 32000,
      "displayScore": 4,
      "rankingEligible": true,
      "overallRank": 122,
      "url": "https://benchlm.ai/models/mistral-7b-v0-3",
      "markdownUrl": "https://benchlm.ai/md/models/mistral-7b-v0-3.md",
      "id": 169,
      "releaseDate": null,
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "mistral-7b",
        "familyName": "Mistral 7B",
        "variantType": "snapshot",
        "snapshotLabel": "v0.3",
        "baseFamilyModelKey": "mistral-7b-v0-3",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 4,
        "overallScore": 3,
        "rawOverallScore": 3,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": 0.7,
          "reasoning": 2.7,
          "multimodalGrounded": null,
          "knowledge": 4.9,
          "multilingual": 7.4,
          "instructionFollowing": 3.7,
          "math": 1.7
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 122,
        "categoryRanks": {
          "agentic": 101,
          "coding": 100,
          "reasoning": 89,
          "multimodalGrounded": 109,
          "knowledge": 106,
          "multilingual": 90,
          "instructionFollowing": 121,
          "math": 86
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 32,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "mistral-8x7b-v0-2",
      "canonicalModelKey": "mistral-8x7b-v0-2",
      "model": "Mistral 8x7B v0.2",
      "creator": "Mistral",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "32K",
      "contextWindowTokens": 32000,
      "displayScore": 1,
      "rankingEligible": true,
      "overallRank": 123,
      "url": "https://benchlm.ai/models/mistral-8x7b-v0-2",
      "markdownUrl": "https://benchlm.ai/md/models/mistral-8x7b-v0-2.md",
      "id": 170,
      "releaseDate": null,
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "mistral-8x7b",
        "familyName": "Mistral 8x7B",
        "variantType": "snapshot",
        "snapshotLabel": "v0.2",
        "baseFamilyModelKey": "mistral-8x7b",
        "relatedModelKeys": [
          "mistral-8x7b"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 1,
        "overallScore": 2,
        "rawOverallScore": 2,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": 1,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 2.4,
          "multilingual": 1.9,
          "instructionFollowing": null,
          "math": 1.1
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": true,
        "verifiedRankingEligible": false,
        "overallRank": 123,
        "categoryRanks": {
          "agentic": 102,
          "coding": 99,
          "reasoning": 93,
          "multimodalGrounded": 110,
          "knowledge": 107,
          "multilingual": 97,
          "instructionFollowing": 127,
          "math": 87
        },
        "categoryRankingEligible": {
          "agentic": true,
          "coding": true,
          "reasoning": true,
          "multimodalGrounded": true,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 32,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "gpt-5-5-pro",
      "canonicalModelKey": "gpt-5-5-pro",
      "model": "GPT-5.5 Pro",
      "creator": "OpenAI",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 100,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/gpt-5-5-pro",
      "markdownUrl": "https://benchlm.ai/md/models/gpt-5-5-pro.md",
      "id": 25,
      "releaseDate": "2026-04-23",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "gpt-5-5",
        "familyName": "GPT-5.5",
        "variantType": "pro",
        "snapshotLabel": null,
        "baseFamilyModelKey": "gpt-5-5",
        "relatedModelKeys": [
          "gpt-5-5"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 100,
        "overallScore": 100,
        "rawOverallScore": 100,
        "verifiedDisplayScore": 75,
        "displayCategoryScores": {
          "agentic": 100,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 100,
          "multilingual": null,
          "instructionFollowing": null,
          "math": 100
        },
        "verifiedDisplayCategoryScores": {
          "agentic": 90.1,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 57.2,
          "multilingual": null,
          "instructionFollowing": null,
          "math": 52.4
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 4,
        "verifiedBenchmarkCount": 4,
        "rankableBenchmarkCount": 4,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "browseComp": 90.1
        },
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {
          "hle": 57.2,
          "hleNoTools": 43.1
        },
        "multilingual": {},
        "instructionFollowing": {},
        "math": {
          "frontierMath": 52.4
        }
      }
    },
    {
      "slug": "holo3-35b-a3b",
      "canonicalModelKey": "holo3-35b-a3b",
      "model": "Holo3-35B-A3B",
      "creator": "H Company",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "64K",
      "contextWindowTokens": 64000,
      "displayScore": 100,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/holo3-35b-a3b",
      "markdownUrl": "https://benchlm.ai/md/models/holo3-35b-a3b.md",
      "id": 3,
      "releaseDate": "2026-03-31",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "holo3",
        "familyName": "Holo3",
        "variantType": "35b-a3b",
        "snapshotLabel": null,
        "baseFamilyModelKey": "holo3-122b-a10b",
        "relatedModelKeys": [
          "holo3-122b-a10b"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": "holo2-30b-a3b"
      },
      "scores": {
        "displayScore": 100,
        "overallScore": 100,
        "rawOverallScore": 100,
        "verifiedDisplayScore": 83,
        "displayCategoryScores": {
          "agentic": 100,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": 82.6,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 1,
        "verifiedBenchmarkCount": 1,
        "rankableBenchmarkCount": 1,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "osWorldVerified": 82.56
        },
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "holo3-122b-a10b",
      "canonicalModelKey": "holo3-122b-a10b",
      "model": "Holo3-122B-A10B",
      "creator": "H Company",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "64K",
      "contextWindowTokens": 64000,
      "displayScore": 94,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/holo3-122b-a10b",
      "markdownUrl": "https://benchlm.ai/md/models/holo3-122b-a10b.md",
      "id": 13,
      "releaseDate": "2026-03-31",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "holo3",
        "familyName": "Holo3",
        "variantType": "122b-a10b",
        "snapshotLabel": null,
        "baseFamilyModelKey": "holo3-122b-a10b",
        "relatedModelKeys": [
          "holo3-35b-a3b"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": "holo2-235b-a22b"
      },
      "scores": {
        "displayScore": 94,
        "overallScore": 94,
        "rawOverallScore": 94,
        "verifiedDisplayScore": 79,
        "displayCategoryScores": {
          "agentic": 93.9,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": 78.9,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 1,
        "verifiedBenchmarkCount": 1,
        "rankableBenchmarkCount": 1,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "osWorldVerified": 78.85
        },
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "mimo-v2-5-pro",
      "canonicalModelKey": "mimo-v2-5-pro",
      "model": "MiMo-V2.5-Pro",
      "creator": "Xiaomi",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 85,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/mimo-v2-5-pro",
      "markdownUrl": "https://benchlm.ai/md/models/mimo-v2-5-pro.md",
      "id": 88,
      "releaseDate": "2026-04-22",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "mimo-v2-5",
        "familyName": "MiMo-V2.5",
        "variantType": "pro",
        "snapshotLabel": null,
        "baseFamilyModelKey": "mimo-v2-5",
        "relatedModelKeys": [
          "mimo-v2-5",
          "mimo-v2-pro"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": "mimo-v2-pro"
      },
      "scores": {
        "displayScore": 85,
        "overallScore": 80,
        "rawOverallScore": 80,
        "verifiedDisplayScore": 60,
        "displayCategoryScores": {
          "agentic": 85.5,
          "coding": 80.1,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 87.5,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": 68.4,
          "coding": 57.2,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 48,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 7,
        "verifiedBenchmarkCount": 7,
        "rankableBenchmarkCount": 8,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "clawEval": 63.8,
          "gdpvalAa": 1571,
          "tau3Bench": 72.9,
          "terminalBench2": 68.4,
          "aaAgenticIndex": 67.44,
          "tau2Bench": 94.2,
          "gdpvalAaNormalized": 53.6,
          "apexAgentsAa": 2.4,
          "gertLabs": 62.7
        },
        "coding": {
          "swePro": 57.2,
          "terminalBench2": 68.4,
          "aaCodingIndex": 45.53,
          "terminalBenchHard": 43.2,
          "aaSciCode": 50.2
        },
        "reasoning": {
          "lcr": 73.3,
          "critpt": 4
        },
        "multimodalGrounded": {
          "designArenaWebsite": 1312
        },
        "knowledge": {
          "hle": 48,
          "hleNoTools": 34,
          "artificialAnalysis": 53.83,
          "aaGpqaDiamond": 86.6,
          "aaHle": 33.8,
          "aaOmniscienceIndex": 3.6,
          "omniscienceAccuracy": 22.6,
          "omniscienceHallucinationRate": 24.5
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 79.9
        },
        "math": {}
      }
    },
    {
      "slug": "mimo-v2-pro",
      "canonicalModelKey": "mimo-v2-pro",
      "model": "MiMo-V2-Pro",
      "creator": "Xiaomi",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 84,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/mimo-v2-pro",
      "markdownUrl": "https://benchlm.ai/md/models/mimo-v2-pro.md",
      "id": 6,
      "releaseDate": "2026-03-18",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "mimo-v2-pro",
        "familyName": "MiMo-V2-Pro",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "mimo-v2-pro",
        "relatedModelKeys": [
          "mimo-v2-flash",
          "mimo-v2-omni"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 84,
        "overallScore": 84,
        "rawOverallScore": 84,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 77.5,
          "coding": 95.7,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 87.8,
          "multilingual": null,
          "instructionFollowing": 52.5,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {
          "instructionFollowing": 82
        },
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": true,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 1,
        "verifiedBenchmarkCount": 1,
        "rankableBenchmarkCount": 7,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "clawEval": 57.8,
          "aaAgenticIndex": 62.8,
          "tau2Bench": 95,
          "gdpvalAaNormalized": 45.3,
          "gdpvalAa": 1405,
          "gertLabs": 36.68
        },
        "coding": {
          "sweVerified": 78,
          "aaCodingIndex": 41.43,
          "terminalBenchHard": 40.9,
          "aaSciCode": 42.5
        },
        "reasoning": {
          "lcr": 60.7,
          "critpt": 0.3
        },
        "multimodalGrounded": {},
        "knowledge": {
          "artificialAnalysis": 49.2,
          "aaGpqaDiamond": 87,
          "aaHle": 28.3,
          "aaOmniscienceIndex": 4.9,
          "omniscienceAccuracy": 26.8,
          "omniscienceHallucinationRate": 29.9
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 68.8
        },
        "math": {}
      }
    },
    {
      "slug": "mimo-v2-omni",
      "canonicalModelKey": "mimo-v2-omni",
      "model": "MiMo-V2-Omni",
      "creator": "Xiaomi",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "262K",
      "contextWindowTokens": 262000,
      "displayScore": 84,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/mimo-v2-omni",
      "markdownUrl": "https://benchlm.ai/md/models/mimo-v2-omni.md",
      "id": 24,
      "releaseDate": "2026-03-18",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "mimo-v2-omni",
        "familyName": "MiMo-V2-Omni",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "mimo-v2-omni",
        "relatedModelKeys": [
          "mimo-v2-flash",
          "mimo-v2-pro"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 84,
        "overallScore": 84,
        "rawOverallScore": 84,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": 91,
          "reasoning": null,
          "multimodalGrounded": 71.6,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 1,
        "verifiedBenchmarkCount": 1,
        "rankableBenchmarkCount": 3,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "clawEval": 45.2,
          "aaAgenticIndex": 58.56,
          "tau2Bench": 91.2,
          "gdpvalAaNormalized": 40.9,
          "gdpvalAa": 1317
        },
        "coding": {
          "sweVerified": 74.8,
          "aaCodingIndex": 35.46,
          "terminalBenchHard": 34.8,
          "aaSciCode": 36.7
        },
        "reasoning": {
          "lcr": 66.7,
          "critpt": 1.1
        },
        "multimodalGrounded": {
          "aaMmmuPro": 69.9
        },
        "knowledge": {
          "artificialAnalysis": 43.4,
          "aaGpqaDiamond": 82.8,
          "aaHle": 19.9,
          "aaOmniscienceIndex": -17.4,
          "omniscienceAccuracy": 18.7,
          "omniscienceHallucinationRate": 44.4
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 53.5
        },
        "math": {}
      }
    },
    {
      "slug": "composer-2-5",
      "canonicalModelKey": "composer-2-5",
      "model": "Composer 2.5",
      "creator": "Cursor",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "200K",
      "contextWindowTokens": 200000,
      "displayScore": 81,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/composer-2-5",
      "markdownUrl": "https://benchlm.ai/md/models/composer-2-5.md",
      "id": 46,
      "releaseDate": "2026-05-18",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "composer",
        "familyName": "Composer",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "composer-2-5",
        "relatedModelKeys": [
          "composer-2",
          "composer-2-fast"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": "composer-2"
      },
      "scores": {
        "displayScore": 81,
        "overallScore": 81,
        "rawOverallScore": 81,
        "verifiedDisplayScore": 69,
        "displayCategoryScores": {
          "agentic": 81.5,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": 69.3,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 4,
        "verifiedBenchmarkCount": 4,
        "rankableBenchmarkCount": 4,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "terminalBench2": 69.3
        },
        "coding": {
          "terminalBench2": 69.3,
          "sweMultilingual": 79.8,
          "cursorBench31": 63.2
        },
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "muse-spark",
      "canonicalModelKey": "muse-spark",
      "model": "Muse Spark",
      "creator": "Meta",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "262K",
      "contextWindowTokens": 262000,
      "displayScore": 80,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/muse-spark",
      "markdownUrl": "https://benchlm.ai/md/models/muse-spark.md",
      "id": 103,
      "releaseDate": "2026-04-08",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "muse-spark",
        "familyName": "Muse Spark",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "muse-spark",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 80,
        "overallScore": 69,
        "rawOverallScore": 69,
        "verifiedDisplayScore": 58,
        "displayCategoryScores": {
          "agentic": 74,
          "coding": 82.7,
          "reasoning": 57,
          "multimodalGrounded": 78.2,
          "knowledge": 92.5,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": 59,
          "coding": 61.7,
          "reasoning": 42.5,
          "multimodalGrounded": 82.2,
          "knowledge": 50.4,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {
          "multimodalGrounded": 19
        },
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": true,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 21,
        "verifiedBenchmarkCount": 21,
        "rankableBenchmarkCount": 21,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 3
      },
      "benchmarks": {
        "agentic": {
          "terminalBench2": 59,
          "tau2Bench": 91.5,
          "deepSearchQa": 74.8,
          "cyberGym": 43.5,
          "clawEval": 63.8,
          "aaAgenticIndex": 61.99,
          "gdpvalAaNormalized": 45.9,
          "gdpvalAa": 1417
        },
        "coding": {
          "sweVerified": 77.4,
          "swePro": 52.4,
          "liveCodeBenchPro": 80,
          "vibeCodeBench": 19.674,
          "aaCodingIndex": 47.47,
          "terminalBenchHard": 45.5,
          "aaSciCode": 51.5
        },
        "reasoning": {
          "arcAgi2": 42.5,
          "lcr": 69.7,
          "critpt": 11.3
        },
        "multimodalGrounded": {
          "charxiv": 86.4,
          "mmmuPro": 80.4,
          "erqa": 64.7,
          "simpleVqa": 71.3,
          "screenSpotPro": 84.1,
          "zeroBench": 33,
          "medXpertQaMm": 78.4,
          "gdpvalAa": 1444,
          "aaMmmuPro": 80.5
        },
        "knowledge": {
          "gpqaDiamond": 89.5,
          "hle": 50.4,
          "hleNoTools": 42.8,
          "healthBenchHard": 42.8,
          "medXpertQaText": 52.6,
          "artificialAnalysis": 52.15,
          "aaGpqaDiamond": 88.4,
          "aaHle": 39.9,
          "aaOmniscienceIndex": 4.1,
          "omniscienceAccuracy": 44.6,
          "omniscienceHallucinationRate": 73.2
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 75.9
        },
        "math": {}
      }
    },
    {
      "slug": "qwen3-6-max-preview",
      "canonicalModelKey": "qwen3-6-max-preview",
      "model": "Qwen 3.6 Max (preview)",
      "creator": "Alibaba",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "256K",
      "contextWindowTokens": 256000,
      "displayScore": 78,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/qwen3-6-max-preview",
      "markdownUrl": "https://benchlm.ai/md/models/qwen3-6-max-preview.md",
      "id": 75,
      "releaseDate": "2026-04-20",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "qwen3-6-max",
        "familyName": "Qwen 3.6 Max",
        "variantType": "preview",
        "snapshotLabel": "preview",
        "baseFamilyModelKey": "qwen3-6-max-preview",
        "relatedModelKeys": [
          "qwen3-6-plus",
          "qwen3-5-plus"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 78,
        "overallScore": 72,
        "rawOverallScore": 72,
        "verifiedDisplayScore": 63,
        "displayCategoryScores": {
          "agentic": 79.9,
          "coding": 75.2,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 69.3,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": 65.4,
          "coding": 54.1,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 73.9,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 8,
        "verifiedBenchmarkCount": 8,
        "rankableBenchmarkCount": 8,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 2
      },
      "benchmarks": {
        "agentic": {
          "terminalBench2": 65.4,
          "qwenClawBench": 59,
          "qwenWebBench": 1532,
          "aaAgenticIndex": 64.83,
          "tau2Bench": 95.9,
          "gdpvalAaNormalized": 50.2,
          "gdpvalAa": 1504
        },
        "coding": {
          "swePro": 57.3,
          "sciCode": 47,
          "nl2Repo": 42.9,
          "terminalBench2": 65.4,
          "aaCodingIndex": 44.92,
          "terminalBenchHard": 43.9,
          "aaSciCode": 46.9
        },
        "reasoning": {
          "lcr": 69.7,
          "critpt": 3.7
        },
        "multimodalGrounded": {},
        "knowledge": {
          "superGpqa": 73.9,
          "artificialAnalysis": 51.81,
          "aaGpqaDiamond": 88.8,
          "aaHle": 28.9,
          "aaOmniscienceIndex": 10.2,
          "omniscienceAccuracy": 37.7,
          "omniscienceHallucinationRate": 44.2
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 76.6
        },
        "math": {}
      }
    },
    {
      "slug": "mistral-medium-3-5-128b",
      "canonicalModelKey": "mistral-medium-3-5-128b",
      "model": "Mistral Medium 3.5 128B",
      "creator": "Mistral",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "256K",
      "contextWindowTokens": 256000,
      "displayScore": 78,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/mistral-medium-3-5-128b",
      "markdownUrl": "https://benchlm.ai/md/models/mistral-medium-3-5-128b.md",
      "id": 17,
      "releaseDate": "2026-04-29",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "mistral-medium-3-5",
        "familyName": "Mistral Medium 3.5",
        "variantType": "128b",
        "snapshotLabel": null,
        "baseFamilyModelKey": "mistral-medium-3-5-128b",
        "relatedModelKeys": [
          "mistral-medium-3"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 78,
        "overallScore": 95,
        "rawOverallScore": 95,
        "verifiedDisplayScore": 78,
        "displayCategoryScores": {
          "agentic": null,
          "coding": 84.4,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": 77.6,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 2,
        "verifiedBenchmarkCount": 2,
        "rankableBenchmarkCount": 2,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "tau3Bench": 91.4,
          "aaAgenticIndex": 53.16,
          "tau2Bench": 94.2,
          "gdpvalAaNormalized": 33.4,
          "gdpvalAa": 1168,
          "gertLabs": 39.1
        },
        "coding": {
          "sweVerified": 77.6,
          "aaCodingIndex": 35.42,
          "terminalBenchHard": 33.3,
          "aaSciCode": 39.6
        },
        "reasoning": {
          "lcr": 61,
          "critpt": 0
        },
        "multimodalGrounded": {
          "aaMmmuPro": 64.9
        },
        "knowledge": {
          "artificialAnalysis": 39.23,
          "aaGpqaDiamond": 74.8,
          "aaHle": 12.8,
          "aaOmniscienceIndex": -36.3,
          "omniscienceAccuracy": 25.1,
          "omniscienceHallucinationRate": 82
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 68.8
        },
        "math": {}
      }
    },
    {
      "slug": "interfaze-beta",
      "canonicalModelKey": "interfaze-beta",
      "model": "Interfaze Beta",
      "creator": "Interfaze",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 77,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/interfaze-beta",
      "markdownUrl": "https://benchlm.ai/md/models/interfaze-beta.md",
      "id": 10,
      "releaseDate": "2026-05-11",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "interfaze",
        "familyName": "Interfaze",
        "variantType": "beta",
        "snapshotLabel": "beta",
        "baseFamilyModelKey": "interfaze-beta",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 77,
        "overallScore": 77,
        "rawOverallScore": 77,
        "verifiedDisplayScore": 81,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": 61.6,
          "knowledge": 92.2,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": 71.1,
          "knowledge": 89.9,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 10,
        "verifiedBenchmarkCount": 10,
        "rankableBenchmarkCount": 10,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {
          "spider2Lite": 52.9
        },
        "reasoning": {},
        "multimodalGrounded": {
          "ocrBenchV2": 70.7,
          "olmOcr": 85.7,
          "refcocoAvg": 82.1,
          "voxPopuliWer": 2.4,
          "mmmuPro": 71.1
        },
        "knowledge": {
          "gpqa": 89.9,
          "gpqaDiamond": 89.9,
          "mmmlu": 90.9
        },
        "multilingual": {},
        "instructionFollowing": {
          "sobValueAcc": 79.5
        },
        "math": {}
      }
    },
    {
      "slug": "grok-4-3",
      "canonicalModelKey": "grok-4-3",
      "model": "Grok 4.3",
      "creator": "xAI",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 74,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/grok-4-3",
      "markdownUrl": "https://benchlm.ai/md/models/grok-4-3.md",
      "id": 87,
      "releaseDate": "2026-04-30",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "grok-4-3",
        "familyName": "Grok 4.3",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "grok-4-3",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 74,
        "overallScore": 71,
        "rawOverallScore": 71,
        "verifiedDisplayScore": 60,
        "displayCategoryScores": {
          "agentic": null,
          "coding": 64.3,
          "reasoning": null,
          "multimodalGrounded": 73.9,
          "knowledge": 73.6,
          "multilingual": null,
          "instructionFollowing": 86.9,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": 47.3,
          "reasoning": null,
          "multimodalGrounded": 78.1,
          "knowledge": 53.9,
          "multilingual": null,
          "instructionFollowing": 81.3,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {
          "instructionFollowing": 23
        },
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": true,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 6,
        "verifiedBenchmarkCount": 6,
        "rankableBenchmarkCount": 6,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "tau2Bench": 97.7,
          "gdpvalAaNormalized": 49.8,
          "aaAgenticIndex": 65.89,
          "apexAgentsAa": 17,
          "gdpvalAa": 1495,
          "gertLabs": 43.86
        },
        "coding": {
          "sciCode": 47.3,
          "terminalBenchHard": 37.9,
          "aaCodingIndex": 41.03,
          "aaSciCode": 47.3
        },
        "reasoning": {
          "lcr": 64.3,
          "critpt": 8
        },
        "multimodalGrounded": {
          "mmmuPro": 78.1,
          "designArenaWebsite": 1252,
          "aaMmmuPro": 78.1
        },
        "knowledge": {
          "artificialAnalysis": 53.2,
          "gpqa": 90.1,
          "hle": 35,
          "omniscienceAccuracy": 34.6,
          "omniscienceHallucinationRate": 25,
          "aaGpqaDiamond": 90.1,
          "aaHle": 35,
          "aaOmniscienceIndex": 18.3
        },
        "multilingual": {},
        "instructionFollowing": {
          "ifBench": 81.3,
          "aaIfBench": 81.3
        },
        "math": {}
      }
    },
    {
      "slug": "composer-2",
      "canonicalModelKey": "composer-2",
      "model": "Composer 2",
      "creator": "Cursor",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "200K",
      "contextWindowTokens": 200000,
      "displayScore": 73,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/composer-2",
      "markdownUrl": "https://benchlm.ai/md/models/composer-2.md",
      "id": 89,
      "releaseDate": "2026-03-19",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "composer",
        "familyName": "Composer",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "composer-2",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 73,
        "overallScore": 73,
        "rawOverallScore": 73,
        "verifiedDisplayScore": 60,
        "displayCategoryScores": {
          "agentic": 65.3,
          "coding": 80.4,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": 61.7,
          "coding": 58,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 5,
        "verifiedBenchmarkCount": 5,
        "rankableBenchmarkCount": 5,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "terminalBench2": 61.7
        },
        "coding": {
          "sweMultilingual": 73.7,
          "sweRebench": 58,
          "reactNativeEvals": 96.1,
          "terminalBench2": 61.7
        },
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "mimo-v2-5",
      "canonicalModelKey": "mimo-v2-5",
      "model": "MiMo-V2.5",
      "creator": "Xiaomi",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 71,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/mimo-v2-5",
      "markdownUrl": "https://benchlm.ai/md/models/mimo-v2-5.md",
      "id": 70,
      "releaseDate": "2026-04-22",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "mimo-v2-5",
        "familyName": "MiMo-V2.5",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "mimo-v2-5",
        "relatedModelKeys": [
          "mimo-v2-5-pro",
          "mimo-v2-omni"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": "mimo-v2-omni"
      },
      "scores": {
        "displayScore": 71,
        "overallScore": 73,
        "rawOverallScore": 73,
        "verifiedDisplayScore": 65,
        "displayCategoryScores": {
          "agentic": 72.7,
          "coding": 70.3,
          "reasoning": null,
          "multimodalGrounded": 70,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": 65.8,
          "coding": 56.1,
          "reasoning": null,
          "multimodalGrounded": 78.9,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {
          "multimodalGrounded": 30
        },
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": true,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 8,
        "verifiedBenchmarkCount": 8,
        "rankableBenchmarkCount": 9,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 2
      },
      "benchmarks": {
        "agentic": {
          "clawEval": 62.3,
          "mmClawBench": 23.8,
          "terminalBench2": 65.8,
          "gertLabs": 46.89
        },
        "coding": {
          "swePro": 56.1,
          "terminalBench2": 65.8
        },
        "reasoning": {},
        "multimodalGrounded": {
          "videoMmeWithSub": 87.7,
          "charxiv": 81,
          "mmmuPro": 77.9,
          "designArenaWebsite": 1306
        },
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "step-3-7-flash",
      "canonicalModelKey": "step-3-7-flash",
      "model": "Step 3.7 Flash",
      "creator": "StepFun",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "256K",
      "contextWindowTokens": 256000,
      "displayScore": 70,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/step-3-7-flash",
      "markdownUrl": "https://benchlm.ai/md/models/step-3-7-flash.md",
      "id": 239,
      "releaseDate": "2026-05-29",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "step-3-7-flash",
        "familyName": "Step 3.7 Flash",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "step-3-7-flash",
        "relatedModelKeys": [
          "step-3-5-flash"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": "step-3-5-flash"
      },
      "scores": {
        "displayScore": 70,
        "overallScore": 71,
        "rawOverallScore": 71,
        "verifiedDisplayScore": 61,
        "displayCategoryScores": {
          "agentic": 70.6,
          "coding": 72.5,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": 65.9,
          "coding": 56.3,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 10,
        "verifiedBenchmarkCount": 10,
        "rankableBenchmarkCount": 11,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "terminalBench2": 59.5,
          "browseComp": 75.82,
          "deepSearchQa": 92.82,
          "gdpvalAaNormalized": 40,
          "toolathlon": 49.5,
          "clawEval": 67.1,
          "hleWithTools": 47.2,
          "gertLabs": 51.57,
          "aaAgenticIndex": 59.53,
          "tau2Bench": 98.5,
          "gdpvalAa": 1300
        },
        "coding": {
          "swePro": 56.3,
          "terminalBench2": 59.5,
          "aaCodingIndex": 37.09,
          "terminalBenchHard": 35.6,
          "aaSciCode": 40
        },
        "reasoning": {
          "lcr": 63.7,
          "critpt": 2.3
        },
        "multimodalGrounded": {
          "simpleVqa": 79.2,
          "vStar": 95.3,
          "aaMmmuPro": 75.3,
          "designArenaWebsite": 1227
        },
        "knowledge": {
          "artificialAnalysis": 42.59,
          "aaGpqaDiamond": 80.9,
          "aaHle": 19.9,
          "aaOmniscienceIndex": -37.5,
          "omniscienceAccuracy": 25.4,
          "omniscienceHallucinationRate": 84.4
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 67.3
        },
        "math": {}
      }
    },
    {
      "slug": "grok-4-20-multi-agent-beta",
      "canonicalModelKey": "grok-4-20-multi-agent-beta",
      "model": "Grok 4.20 Multi-agent",
      "creator": "xAI",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "2M",
      "contextWindowTokens": 2000000,
      "displayScore": 70,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/grok-4-20-multi-agent-beta",
      "markdownUrl": "https://benchlm.ai/md/models/grok-4-20-multi-agent-beta.md",
      "id": 8,
      "releaseDate": "2026-03-10",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "grok-4-20",
        "familyName": "Grok 4.20",
        "variantType": "multi-agent",
        "snapshotLabel": null,
        "baseFamilyModelKey": "grok-4-20-beta",
        "relatedModelKeys": [
          "grok-4-20-beta"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 70,
        "overallScore": 70,
        "rawOverallScore": 70,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 62.8,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": 100,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {
          "instructionFollowing": 2
        },
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": true,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 2,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "gpt-5-4-mini",
      "canonicalModelKey": "gpt-5-4-mini",
      "model": "GPT-5.4 mini",
      "creator": "OpenAI",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "400K",
      "contextWindowTokens": 400000,
      "displayScore": 68,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/gpt-5-4-mini",
      "markdownUrl": "https://benchlm.ai/md/models/gpt-5-4-mini.md",
      "id": 81,
      "releaseDate": "2026-03-17",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "gpt-5-4",
        "familyName": "GPT-5.4",
        "variantType": "mini",
        "snapshotLabel": null,
        "baseFamilyModelKey": "gpt-5-4",
        "relatedModelKeys": [
          "gpt-5-4",
          "gpt-5-4-pro",
          "gpt-5-4-nano"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": "gpt-5-mini"
      },
      "scores": {
        "displayScore": 68,
        "overallScore": 62,
        "rawOverallScore": 62,
        "verifiedDisplayScore": 62,
        "displayCategoryScores": {
          "agentic": 72.6,
          "coding": 75.7,
          "reasoning": 15.2,
          "multimodalGrounded": 71.2,
          "knowledge": 80,
          "multilingual": null,
          "instructionFollowing": 72.2,
          "math": 92.2
        },
        "verifiedDisplayCategoryScores": {
          "agentic": 65.6,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": 76.6,
          "knowledge": 41.5,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {
          "instructionFollowing": 51
        },
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": true,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 9,
        "verifiedBenchmarkCount": 9,
        "rankableBenchmarkCount": 19,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 2
      },
      "benchmarks": {
        "agentic": {
          "terminalBench2": 60,
          "osWorldVerified": 72.1,
          "mcpAtlas": 57.7,
          "toolathlon": 42.9,
          "tau2Bench": 83.3,
          "aaAgenticIndex": 58.88,
          "apexAgentsAa": 28.2,
          "gdpvalAaNormalized": 46.9,
          "gdpvalAa": 1438
        },
        "coding": {
          "vibeCodeBench": 47.969,
          "aaCodingIndex": 51.48,
          "terminalBenchHard": 52.3,
          "aaSciCode": 49.9
        },
        "reasoning": {
          "lcr": 69.3,
          "critpt": 10
        },
        "multimodalGrounded": {
          "mmmuPro": 76.6,
          "mmmuProPython": 78,
          "aaMmmuPro": 73.3
        },
        "knowledge": {
          "gpqa": 88,
          "hle": 41.5,
          "hleNoTools": 28.2,
          "artificialAnalysis": 48.9,
          "aaGpqaDiamond": 87.5,
          "aaHle": 26.6,
          "aaOmniscienceIndex": -18.7,
          "omniscienceAccuracy": 37.5,
          "omniscienceHallucinationRate": 89.8
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 73.3
        },
        "math": {}
      }
    },
    {
      "slug": "gemma-4-31b",
      "canonicalModelKey": "gemma-4-31b",
      "model": "Gemma 4 31B",
      "creator": "Google",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "256K",
      "contextWindowTokens": 256000,
      "displayScore": 64,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/gemma-4-31b",
      "markdownUrl": "https://benchlm.ai/md/models/gemma-4-31b.md",
      "id": 65,
      "releaseDate": "2026-04-02",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "gemma-4",
        "familyName": "Gemma 4",
        "variantType": "31b",
        "snapshotLabel": null,
        "baseFamilyModelKey": "gemma-4-31b",
        "relatedModelKeys": [
          "gemma-4-26b-a4b",
          "gemma-4-e4b",
          "gemma-4-e2b"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 64,
        "overallScore": 58,
        "rawOverallScore": 58,
        "verifiedDisplayScore": 55,
        "displayCategoryScores": {
          "agentic": null,
          "coding": 54,
          "reasoning": 54.7,
          "multimodalGrounded": 71.8,
          "knowledge": 72.2,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": 41.6,
          "reasoning": null,
          "multimodalGrounded": 76.9,
          "knowledge": 55.2,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {
          "knowledge": 34
        },
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": true,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 6,
        "verifiedBenchmarkCount": 6,
        "rankableBenchmarkCount": 11,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 40.94,
          "tau2Bench": 59.9,
          "gdpvalAaNormalized": 30.7,
          "gdpvalAa": 1113,
          "gertLabs": 35.26
        },
        "coding": {
          "sweRebench": 41.6,
          "reactNativeEvals": 75.2,
          "aaCodingIndex": 38.71,
          "terminalBenchHard": 36.4,
          "aaSciCode": 43.4
        },
        "reasoning": {
          "lcr": 62,
          "critpt": 1.4
        },
        "multimodalGrounded": {
          "mmmuPro": 76.9,
          "aaMmmuPro": 73.4
        },
        "knowledge": {
          "gpqa": 84.3,
          "mmluPro": 85.2,
          "hle": 26.5,
          "hleNoTools": 19.5,
          "artificialAnalysis": 39.18,
          "aaGpqaDiamond": 85.7,
          "aaHle": 22.7,
          "aaOmniscienceIndex": -45.4,
          "omniscienceAccuracy": 19.9,
          "omniscienceHallucinationRate": 81.6
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 75.6
        },
        "math": {}
      }
    },
    {
      "slug": "exaone-4-0-32b",
      "canonicalModelKey": "exaone-4-0-32b",
      "model": "Exaone 4.0 32B",
      "creator": "LG AI Research",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 63,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/exaone-4-0-32b",
      "markdownUrl": "https://benchlm.ai/md/models/exaone-4-0-32b.md",
      "id": 2,
      "releaseDate": null,
      "market": "Korea",
      "isRegional": true,
      "family": {
        "familyKey": "exaone-4",
        "familyName": "Exaone 4.0",
        "variantType": "32b",
        "snapshotLabel": null,
        "baseFamilyModelKey": "exaone-4-0-32b",
        "relatedModelKeys": [
          "exaone-4-0-1-2b"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 63,
        "overallScore": 81,
        "rawOverallScore": 81,
        "verifiedDisplayScore": 83,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 81.1,
          "multilingual": null,
          "instructionFollowing": null,
          "math": 79.8
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 81.8,
          "multilingual": null,
          "instructionFollowing": null,
          "math": 85.3
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 2,
        "verifiedBenchmarkCount": 2,
        "rankableBenchmarkCount": 2,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 1.36,
          "tau2Bench": 4.1,
          "gdpvalAaNormalized": 0,
          "gdpvalAa": 328
        },
        "coding": {
          "aaCodingIndex": 9.42,
          "terminalBenchHard": 1.5,
          "aaSciCode": 25.2
        },
        "reasoning": {
          "lcr": 8,
          "critpt": 0
        },
        "multimodalGrounded": {},
        "knowledge": {
          "mmluPro": 81.8,
          "artificialAnalysis": 11.66,
          "aaGpqaDiamond": 62.8,
          "aaHle": 4.9,
          "aaOmniscienceIndex": -62.3,
          "omniscienceAccuracy": 10.4,
          "omniscienceHallucinationRate": 81
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 33.5
        },
        "math": {
          "aime2025": 85.3
        },
        "korean": {}
      }
    },
    {
      "slug": "glm-5v-turbo",
      "canonicalModelKey": "glm-5v-turbo",
      "model": "GLM-5V-Turbo",
      "creator": "Z.AI",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "200K",
      "contextWindowTokens": 200000,
      "displayScore": 62,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/glm-5v-turbo",
      "markdownUrl": "https://benchlm.ai/md/models/glm-5v-turbo.md",
      "id": 102,
      "releaseDate": "2026-03-01",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "glm-5",
        "familyName": "GLM-5",
        "variantType": "vision-turbo",
        "snapshotLabel": null,
        "baseFamilyModelKey": "glm-5",
        "relatedModelKeys": [
          "glm-5",
          "glm-5-reasoning",
          "glm-5-1",
          "glm-5-turbo"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 62,
        "overallScore": 49,
        "rawOverallScore": 49,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 60.4,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 1,
        "verifiedBenchmarkCount": 1,
        "rankableBenchmarkCount": 16,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "clawEval": 53.8,
          "aaAgenticIndex": 61.07,
          "tau2Bench": 98.5,
          "gdpvalAaNormalized": 41.4,
          "gdpvalAa": 1328,
          "gertLabs": 30.76
        },
        "coding": {
          "aaCodingIndex": 36.22,
          "terminalBenchHard": 32.6,
          "aaSciCode": 43.5
        },
        "reasoning": {
          "lcr": 61,
          "critpt": 0.6
        },
        "multimodalGrounded": {
          "aaMmmuPro": 72.8
        },
        "knowledge": {
          "artificialAnalysis": 42.85,
          "aaGpqaDiamond": 80.9,
          "aaHle": 15.8,
          "aaOmniscienceIndex": -19,
          "omniscienceAccuracy": 29.1,
          "omniscienceHallucinationRate": 67.9
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 61.1
        },
        "math": {}
      }
    },
    {
      "slug": "gpt-5-4-nano",
      "canonicalModelKey": "gpt-5-4-nano",
      "model": "GPT-5.4 nano",
      "creator": "OpenAI",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "400K",
      "contextWindowTokens": 400000,
      "displayScore": 59,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/gpt-5-4-nano",
      "markdownUrl": "https://benchlm.ai/md/models/gpt-5-4-nano.md",
      "id": 127,
      "releaseDate": "2026-03-17",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "gpt-5-4",
        "familyName": "GPT-5.4",
        "variantType": "nano",
        "snapshotLabel": null,
        "baseFamilyModelKey": "gpt-5-4",
        "relatedModelKeys": [
          "gpt-5-4",
          "gpt-5-4-pro",
          "gpt-5-4-mini"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": "gpt-5-nano"
      },
      "scores": {
        "displayScore": 59,
        "overallScore": 42,
        "rawOverallScore": 42,
        "verifiedDisplayScore": 48,
        "displayCategoryScores": {
          "agentic": 47.3,
          "coding": 72.9,
          "reasoning": 14.7,
          "multimodalGrounded": 52.8,
          "knowledge": 74.1,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": 42.9,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": 66.1,
          "knowledge": 37.7,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 9,
        "verifiedBenchmarkCount": 9,
        "rankableBenchmarkCount": 17,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 2
      },
      "benchmarks": {
        "agentic": {
          "terminalBench2": 46.3,
          "osWorldVerified": 39,
          "mcpAtlas": 56.1,
          "toolathlon": 35.5,
          "tau2Bench": 76,
          "aaAgenticIndex": 47.6,
          "apexAgentsAa": 24.9,
          "gdpvalAaNormalized": 34.8,
          "gdpvalAa": 1195
        },
        "coding": {
          "vibeCodeBench": 26.097,
          "aaCodingIndex": 43.91,
          "terminalBenchHard": 42.4,
          "aaSciCode": 46.9
        },
        "reasoning": {
          "lcr": 66,
          "critpt": 9.3
        },
        "multimodalGrounded": {
          "mmmuPro": 66.1,
          "mmmuProPython": 69.5,
          "aaMmmuPro": 65.4
        },
        "knowledge": {
          "gpqa": 82.8,
          "hle": 37.7,
          "hleNoTools": 24.3,
          "artificialAnalysis": 43.98,
          "aaGpqaDiamond": 81.7,
          "aaHle": 26.5,
          "aaOmniscienceIndex": -29.5,
          "omniscienceAccuracy": 25.4,
          "omniscienceHallucinationRate": 73.6
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 75.9
        },
        "math": {}
      }
    },
    {
      "slug": "mellum2-12b-a2-5b-thinking",
      "canonicalModelKey": "mellum2-12b-a2-5b-thinking",
      "model": "Mellum2-12B-A2.5B-Thinking",
      "creator": "JetBrains",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 59,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/mellum2-12b-a2-5b-thinking",
      "markdownUrl": "https://benchlm.ai/md/models/mellum2-12b-a2-5b-thinking.md",
      "id": 252,
      "releaseDate": "2026-05-28",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "mellum2-12b-a2-5b",
        "familyName": "Mellum2 12B-A2.5B",
        "variantType": "thinking",
        "snapshotLabel": null,
        "baseFamilyModelKey": "mellum2-12b-a2-5b-instruct",
        "relatedModelKeys": [
          "mellum2-12b-a2-5b-instruct"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 59,
        "overallScore": 59,
        "rawOverallScore": 59,
        "verifiedDisplayScore": 67,
        "displayCategoryScores": {
          "agentic": null,
          "coding": 73.9,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 42.7,
          "multilingual": null,
          "instructionFollowing": 34.8,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": 69.9,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 57.6,
          "multilingual": null,
          "instructionFollowing": 76.5,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {
          "instructionFollowing": 106
        },
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": true,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 6,
        "verifiedBenchmarkCount": 6,
        "rankableBenchmarkCount": 6,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "bfclV4": 45.6
        },
        "coding": {
          "liveCodeBench": 69.9
        },
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {
          "mmluRedux": 86.2,
          "gpqa": 57.6,
          "gpqaDiamond": 57.6
        },
        "multilingual": {},
        "instructionFollowing": {
          "ifeval": 76.5
        },
        "math": {}
      }
    },
    {
      "slug": "hy3-preview",
      "canonicalModelKey": "hy3-preview",
      "model": "Hy3 Preview",
      "creator": "Tencent",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "256K",
      "contextWindowTokens": 256000,
      "displayScore": 58,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/hy3-preview",
      "markdownUrl": "https://benchlm.ai/md/models/hy3-preview.md",
      "id": 111,
      "releaseDate": "2026-04-23",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "hy3",
        "familyName": "Hy3",
        "variantType": "preview",
        "snapshotLabel": "preview",
        "baseFamilyModelKey": "hy3-preview",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 58,
        "overallScore": 57,
        "rawOverallScore": 57,
        "verifiedDisplayScore": 55,
        "displayCategoryScores": {
          "agentic": 57.1,
          "coding": 62.6,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 60.1,
          "multilingual": null,
          "instructionFollowing": 33.8,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": 54.4,
          "coding": 60,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 46.7,
          "multilingual": null,
          "instructionFollowing": 63.1,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {
          "instructionFollowing": 107
        },
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": true,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 8,
        "verifiedBenchmarkCount": 8,
        "rankableBenchmarkCount": 8,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 2
      },
      "benchmarks": {
        "agentic": {
          "terminalBench2": 54.4,
          "tau2Bench": 92.7,
          "gdpvalAaNormalized": 36.8,
          "aaAgenticIndex": 55.67,
          "gdpvalAa": 1236,
          "gertLabs": 36.91
        },
        "coding": {
          "sweVerified": 74.4,
          "terminalBench2": 54.4,
          "terminalBenchHard": 34.1,
          "sciCode": 41.2,
          "aaCodingIndex": 36.46,
          "aaSciCode": 41.2
        },
        "reasoning": {
          "lcr": 54.7,
          "critpt": 4.6
        },
        "multimodalGrounded": {},
        "knowledge": {
          "artificialAnalysis": 41.85,
          "gpqa": 87.2,
          "gpqaDiamond": 87.2,
          "hle": 25.5,
          "omniscienceAccuracy": 28,
          "omniscienceHallucinationRate": 86.9,
          "aaGpqaDiamond": 86.7,
          "aaHle": 25.5,
          "aaOmniscienceIndex": -34.6
        },
        "multilingual": {},
        "instructionFollowing": {
          "ifBench": 63.1,
          "aaIfBench": 63.1
        },
        "math": {}
      }
    },
    {
      "slug": "zaya1-8b",
      "canonicalModelKey": "zaya1-8b",
      "model": "ZAYA1-8B",
      "creator": "Zyphra",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "131K",
      "contextWindowTokens": 131000,
      "displayScore": 57,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/zaya1-8b",
      "markdownUrl": "https://benchlm.ai/md/models/zaya1-8b.md",
      "id": 30,
      "releaseDate": "2026-05-05",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "zaya1",
        "familyName": "ZAYA1",
        "variantType": "8b",
        "snapshotLabel": null,
        "baseFamilyModelKey": "zaya1-8b",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 57,
        "overallScore": 57,
        "rawOverallScore": 57,
        "verifiedDisplayScore": 73,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 62.7,
          "multilingual": null,
          "instructionFollowing": 44.2,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 73.1,
          "multilingual": null,
          "instructionFollowing": 74,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {
          "instructionFollowing": 89
        },
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": true,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 11,
        "verifiedBenchmarkCount": 11,
        "rankableBenchmarkCount": 11,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "bfclV4": 39.22
        },
        "coding": {
          "liveCodeBenchV6": 65.8
        },
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {
          "gpqa": 71,
          "gpqaDiamond": 71,
          "mmluPro": 74.2
        },
        "multilingual": {},
        "instructionFollowing": {
          "ifeval": 85.58,
          "ifBench": 52.56
        },
        "math": {
          "aime2026": 89.1,
          "hmmtFeb2026": 71.6,
          "imoAnswerBench": 59.3,
          "apex": 32.2
        }
      }
    },
    {
      "slug": "gemma-4-26b-a4b",
      "canonicalModelKey": "gemma-4-26b-a4b",
      "model": "Gemma 4 26B A4B",
      "creator": "Google",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "256K",
      "contextWindowTokens": 256000,
      "displayScore": 54,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/gemma-4-26b-a4b",
      "markdownUrl": "https://benchlm.ai/md/models/gemma-4-26b-a4b.md",
      "id": 74,
      "releaseDate": "2026-04-02",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "gemma-4",
        "familyName": "Gemma 4",
        "variantType": "26b-a4b",
        "snapshotLabel": null,
        "baseFamilyModelKey": "gemma-4-31b",
        "relatedModelKeys": [
          "gemma-4-31b",
          "gemma-4-e4b",
          "gemma-4-e2b"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 54,
        "overallScore": 56,
        "rawOverallScore": 56,
        "verifiedDisplayScore": 61,
        "displayCategoryScores": {
          "agentic": null,
          "coding": 65.6,
          "reasoning": 18.8,
          "multimodalGrounded": 66.3,
          "knowledge": 62.3,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": 73.8,
          "knowledge": 49.2,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {
          "knowledge": 49
        },
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": true,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 4,
        "verifiedBenchmarkCount": 4,
        "rankableBenchmarkCount": 8,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 32.15,
          "tau2Bench": 43.6,
          "gdpvalAaNormalized": 25.7,
          "gdpvalAa": 1014
        },
        "coding": {
          "aaCodingIndex": 22.44,
          "terminalBenchHard": 13.6,
          "aaSciCode": 40
        },
        "reasoning": {
          "lcr": 55.7,
          "critpt": 0
        },
        "multimodalGrounded": {
          "mmmuPro": 73.8,
          "aaMmmuPro": 69.2
        },
        "knowledge": {
          "mmluPro": 82.6,
          "hle": 17.2,
          "hleNoTools": 8.7,
          "artificialAnalysis": 31.21,
          "aaGpqaDiamond": 79.2,
          "aaHle": 18.3,
          "aaOmniscienceIndex": -48.1,
          "omniscienceAccuracy": 18.2,
          "omniscienceHallucinationRate": 80.9
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 72.4
        },
        "math": {}
      }
    },
    {
      "slug": "zaya1-74b-preview",
      "canonicalModelKey": "zaya1-74b-preview",
      "model": "ZAYA1-74B-Preview",
      "creator": "Zyphra",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "256K",
      "contextWindowTokens": 256000,
      "displayScore": 54,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/zaya1-74b-preview",
      "markdownUrl": "https://benchlm.ai/md/models/zaya1-74b-preview.md",
      "id": 105,
      "releaseDate": "2026-05-07",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "zaya1",
        "familyName": "ZAYA1",
        "variantType": "74b-preview",
        "snapshotLabel": "preview",
        "baseFamilyModelKey": "zaya1-74b-preview",
        "relatedModelKeys": [
          "zaya1-8b"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 54,
        "overallScore": 54,
        "rawOverallScore": 54,
        "verifiedDisplayScore": 57,
        "displayCategoryScores": {
          "agentic": null,
          "coding": 59.2,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 45.6,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": 53.2,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 64.3,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 7,
        "verifiedBenchmarkCount": 7,
        "rankableBenchmarkCount": 7,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "tau2Airline": 56.1
        },
        "coding": {
          "liveCodeBenchV6": 65.7,
          "sweVerified": 53.2
        },
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {
          "mmluPro": 68.1,
          "gpqa": 57.3,
          "gpqaDiamond": 57.3
        },
        "multilingual": {},
        "instructionFollowing": {},
        "math": {
          "aime2026": 76.4
        }
      }
    },
    {
      "slug": "mistral-small-4-reasoning",
      "canonicalModelKey": "mistral-small-4-reasoning",
      "model": "Mistral Small 4 (Reasoning)",
      "creator": "Mistral",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "256K",
      "contextWindowTokens": 256000,
      "displayScore": 54,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/mistral-small-4-reasoning",
      "markdownUrl": "https://benchlm.ai/md/models/mistral-small-4-reasoning.md",
      "id": 49,
      "releaseDate": "2026-02-20",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "mistral-small-4",
        "familyName": "Mistral Small 4",
        "variantType": "reasoning",
        "snapshotLabel": null,
        "baseFamilyModelKey": "mistral-small-4",
        "relatedModelKeys": [
          "mistral-small-4"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 54,
        "overallScore": 62,
        "rawOverallScore": 62,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": 57.9,
          "reasoning": null,
          "multimodalGrounded": 42.1,
          "knowledge": 68.8,
          "multilingual": null,
          "instructionFollowing": null,
          "math": 77.5
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 5,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 25.87,
          "tau2Bench": 41.2,
          "gdpvalAaNormalized": 18,
          "gdpvalAa": 859
        },
        "coding": {
          "aaCodingIndex": 24.27,
          "terminalBenchHard": 17.4,
          "aaSciCode": 38
        },
        "reasoning": {
          "lcr": 44.7,
          "critpt": 0.3
        },
        "multimodalGrounded": {
          "aaMmmuPro": 56.8
        },
        "knowledge": {
          "artificialAnalysis": 27.8,
          "aaGpqaDiamond": 76.9,
          "aaHle": 9.5,
          "aaOmniscienceIndex": -29.9,
          "omniscienceAccuracy": 22.1,
          "omniscienceHallucinationRate": 66.8
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 48.2
        },
        "math": {}
      }
    },
    {
      "slug": "laguna-m-1",
      "canonicalModelKey": "laguna-m-1",
      "model": "Laguna M.1",
      "creator": "Poolside",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "131K",
      "contextWindowTokens": 131000,
      "displayScore": 51,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/laguna-m-1",
      "markdownUrl": "https://benchlm.ai/md/models/laguna-m-1.md",
      "id": 118,
      "releaseDate": "2026-04-28",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "laguna",
        "familyName": "Laguna",
        "variantType": "m-1",
        "snapshotLabel": null,
        "baseFamilyModelKey": "laguna-m-1",
        "relatedModelKeys": [
          "laguna-xs-2"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 51,
        "overallScore": 51,
        "rawOverallScore": 51,
        "verifiedDisplayScore": 52,
        "displayCategoryScores": {
          "agentic": 31.5,
          "coding": 72.8,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": 45.8,
          "coding": 58.6,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 5,
        "verifiedBenchmarkCount": 5,
        "rankableBenchmarkCount": 5,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "terminalBench2": 45.8
        },
        "coding": {
          "sweVerified": 74.6,
          "sweMultilingual": 63.1,
          "swePro": 49.2,
          "terminalBench2": 45.8
        },
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "k-exaone",
      "canonicalModelKey": "k-exaone",
      "model": "K-Exaone",
      "creator": "LG AI Research",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "256K",
      "contextWindowTokens": 256000,
      "displayScore": 50,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/k-exaone",
      "markdownUrl": "https://benchlm.ai/md/models/k-exaone.md",
      "id": 131,
      "releaseDate": null,
      "market": "Korea",
      "isRegional": true,
      "family": {
        "familyKey": "exaone-frontier",
        "familyName": "K-Exaone",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "k-exaone",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 50,
        "overallScore": 54,
        "rawOverallScore": 54,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": 50.4,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 1,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 38.14,
          "tau2Bench": 74.3,
          "gdpvalAaNormalized": 16.2,
          "gdpvalAa": 824
        },
        "coding": {
          "aaCodingIndex": 27.03,
          "terminalBenchHard": 22.7,
          "aaSciCode": 35.6
        },
        "reasoning": {
          "lcr": 55.7,
          "critpt": 1.1
        },
        "multimodalGrounded": {},
        "knowledge": {
          "artificialAnalysis": 32.12,
          "aaGpqaDiamond": 78.3,
          "aaHle": 13.1,
          "aaOmniscienceIndex": -57.9,
          "omniscienceAccuracy": 16.5,
          "omniscienceHallucinationRate": 89.1
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 64.7
        },
        "math": {},
        "korean": {}
      }
    },
    {
      "slug": "nemotron-3-nano-omni-30b-a3b",
      "canonicalModelKey": "nemotron-3-nano-omni-30b-a3b",
      "model": "Nemotron 3 Nano Omni 30B A3B",
      "creator": "NVIDIA",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "256K",
      "contextWindowTokens": 256000,
      "displayScore": 48,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/nemotron-3-nano-omni-30b-a3b",
      "markdownUrl": "https://benchlm.ai/md/models/nemotron-3-nano-omni-30b-a3b.md",
      "id": 52,
      "releaseDate": "2026-04-28",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "nemotron-3-nano-omni",
        "familyName": "Nemotron 3 Nano Omni",
        "variantType": "30b-a3b",
        "snapshotLabel": null,
        "baseFamilyModelKey": "nemotron-3-nano-omni-30b-a3b",
        "relatedModelKeys": [
          "nemotron-3-nano-30b",
          "nemotron-3-super-120b-a12b",
          "nemotron-3-super-100b",
          "nemotron-3-ultra-500b"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 48,
        "overallScore": 57,
        "rawOverallScore": 57,
        "verifiedDisplayScore": 68,
        "displayCategoryScores": {
          "agentic": null,
          "coding": 40.9,
          "reasoning": null,
          "multimodalGrounded": 47.5,
          "knowledge": 68.3,
          "multilingual": null,
          "instructionFollowing": 71.3,
          "math": 74.8
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": 53.5,
          "reasoning": null,
          "multimodalGrounded": 76.3,
          "knowledge": 75.5,
          "multilingual": null,
          "instructionFollowing": 74.2,
          "math": 82.1
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {
          "instructionFollowing": 52
        },
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": true,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 15,
        "verifiedBenchmarkCount": 15,
        "rankableBenchmarkCount": 15,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 3
      },
      "benchmarks": {
        "agentic": {
          "osWorld": 47.4,
          "tau2Bench": 45.3,
          "aaAgenticIndex": 23.87,
          "gdpvalAaNormalized": 13.1,
          "gdpvalAa": 762
        },
        "coding": {
          "liveCodeBench": 63.2,
          "sciCode": 32,
          "aaCodingIndex": 14.81,
          "terminalBenchHard": 8.3,
          "aaSciCode": 27.8
        },
        "reasoning": {
          "lcr": 35.7,
          "critpt": 0
        },
        "multimodalGrounded": {
          "mmmu": 70.8,
          "mmLongBenchDoc": 57.5,
          "charxiv": 76.25,
          "screenSpotPro": 57.8,
          "videoMmeNoSub": 72.2,
          "ai2dTest": 88.5,
          "refcocoAvg": 90.5,
          "aaMmmuPro": 53.2
        },
        "knowledge": {
          "mmluPro": 77.3,
          "gpqa": 72.2,
          "gpqaDiamond": 72.2,
          "artificialAnalysis": 21.43,
          "aaGpqaDiamond": 46.9,
          "aaHle": 5.3,
          "aaOmniscienceIndex": -56,
          "omniscienceAccuracy": 14.8,
          "omniscienceHallucinationRate": 83.1
        },
        "multilingual": {},
        "instructionFollowing": {
          "ifBench": 74.2,
          "aaIfBench": 63.2
        },
        "math": {
          "aime2025": 82.1
        }
      }
    },
    {
      "slug": "gemma-4-12b",
      "canonicalModelKey": "gemma-4-12b",
      "model": "Gemma 4 12B",
      "creator": "Google",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "256K",
      "contextWindowTokens": 256000,
      "displayScore": 48,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/gemma-4-12b",
      "markdownUrl": "https://benchlm.ai/md/models/gemma-4-12b.md",
      "id": 251,
      "releaseDate": "2026-06-03",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "gemma-4",
        "familyName": "Gemma 4",
        "variantType": "12b",
        "snapshotLabel": null,
        "baseFamilyModelKey": "gemma-4-31b",
        "relatedModelKeys": [
          "gemma-4-31b",
          "gemma-4-26b-a4b",
          "gemma-4-e4b",
          "gemma-4-e2b"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 48,
        "overallScore": 53,
        "rawOverallScore": 53,
        "verifiedDisplayScore": 65,
        "displayCategoryScores": {
          "agentic": null,
          "coding": 65.9,
          "reasoning": 10.1,
          "multimodalGrounded": 58.1,
          "knowledge": 71.7,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": 72,
          "reasoning": 43.4,
          "multimodalGrounded": 69.1,
          "knowledge": 77.8,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 12,
        "verifiedBenchmarkCount": 12,
        "rankableBenchmarkCount": 12,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 2
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 24.63,
          "tau2Bench": 36.3,
          "gdpvalAaNormalized": 18.8,
          "gdpvalAa": 875
        },
        "coding": {
          "liveCodeBench": 72,
          "aaCodingIndex": 24.85,
          "terminalBenchHard": 18.2,
          "aaSciCode": 38.2
        },
        "reasoning": {
          "bbh": 53,
          "mrcrv2": 43.4,
          "lcr": 55.3,
          "critpt": 0
        },
        "multimodalGrounded": {
          "mmmuPro": 69.1,
          "mathVision": 79.7,
          "medXpertQaMm": 48.7,
          "aaMmmuPro": 69.7
        },
        "knowledge": {
          "gpqa": 78.8,
          "gpqaDiamond": 78.8,
          "mmluPro": 77.2,
          "hleNoTools": 5.2,
          "mmmlu": 83.4,
          "artificialAnalysis": 29.17,
          "aaGpqaDiamond": 75.3,
          "aaHle": 14.8,
          "aaOmniscienceIndex": -51.9,
          "omniscienceAccuracy": 16,
          "omniscienceHallucinationRate": 80.8
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 73.5
        },
        "math": {
          "aime2026": 77.5
        }
      }
    },
    {
      "slug": "ternary-bonsai-8b",
      "canonicalModelKey": "ternary-bonsai-8b",
      "model": "Ternary Bonsai 8B",
      "creator": "Prism ML",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "64K",
      "contextWindowTokens": 64000,
      "displayScore": 44,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/ternary-bonsai-8b",
      "markdownUrl": "https://benchlm.ai/md/models/ternary-bonsai-8b.md",
      "id": 80,
      "releaseDate": "2026-04-16",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "ternary-bonsai",
        "familyName": "Ternary Bonsai",
        "variantType": "8b",
        "snapshotLabel": null,
        "baseFamilyModelKey": "ternary-bonsai-8b",
        "relatedModelKeys": [
          "ternary-bonsai-4b",
          "ternary-bonsai-1-7b"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 44,
        "overallScore": 44,
        "rawOverallScore": 44,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": 41.6,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": 54.2,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {
          "instructionFollowing": 81
        },
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": true,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 2,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "lfm2-5-8b-a1b",
      "canonicalModelKey": "lfm2-5-8b-a1b",
      "model": "LFM2.5-8B-A1B",
      "creator": "LiquidAI",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 42,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/lfm2-5-8b-a1b",
      "markdownUrl": "https://benchlm.ai/md/models/lfm2-5-8b-a1b.md",
      "id": 240,
      "releaseDate": "2026-05-28",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "lfm2-5-8b-a1b",
        "familyName": "LFM2.5-8B-A1B",
        "variantType": "reasoning",
        "snapshotLabel": null,
        "baseFamilyModelKey": "lfm2-5-8b-a1b",
        "relatedModelKeys": [
          "lfm2-5-1-2b-thinking",
          "lfm2-5-350m"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 42,
        "overallScore": 50,
        "rawOverallScore": 50,
        "verifiedDisplayScore": 70,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": 63.7,
          "math": 36.5
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": 79.5,
          "math": 59.9
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {
          "instructionFollowing": 62,
          "math": 65
        },
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": true,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 6,
        "verifiedBenchmarkCount": 6,
        "rankableBenchmarkCount": 6,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "bfclV4": 49.73,
          "gdpvalAaNormalized": 0,
          "gdpvalAa": 255,
          "aaAgenticIndex": 5.36,
          "tau2Bench": 16.1
        },
        "coding": {
          "aaCodingIndex": 5.62,
          "terminalBenchHard": 4.5,
          "aaSciCode": 7.8
        },
        "reasoning": {
          "lcr": 0,
          "critpt": 0
        },
        "multimodalGrounded": {},
        "knowledge": {
          "aaGpqaDiamond": 51.3,
          "aaHle": 6.9,
          "aaOmniscienceIndex": -33.3,
          "omniscienceAccuracy": 9.4,
          "omniscienceHallucinationRate": 47,
          "artificialAnalysis": 14.19
        },
        "multilingual": {},
        "instructionFollowing": {
          "ifeval": 91.84,
          "ifBench": 56.47,
          "aaIfBench": 55.6
        },
        "math": {
          "math500": 88.76,
          "aime2025": 42.53,
          "aime2026": 50
        }
      }
    },
    {
      "slug": "mistral-medium-3",
      "canonicalModelKey": "mistral-medium-3",
      "model": "Mistral Medium 3",
      "creator": "Mistral",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 42,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/mistral-medium-3",
      "markdownUrl": "https://benchlm.ai/md/models/mistral-medium-3.md",
      "id": 106,
      "releaseDate": "2026-02-20",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "mistral-medium-3",
        "familyName": "Mistral Medium 3",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "mistral-medium-3",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 42,
        "overallScore": 47,
        "rawOverallScore": 47,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": 20.9,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 59.9,
          "multilingual": null,
          "instructionFollowing": 82.1,
          "math": 82.9
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {
          "instructionFollowing": 32
        },
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": true,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 7,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 13.74,
          "tau2Bench": 24.3,
          "gdpvalAaNormalized": 4.2,
          "gdpvalAa": 585
        },
        "coding": {
          "aaCodingIndex": 13.56,
          "terminalBenchHard": 3.8,
          "aaSciCode": 33.1
        },
        "reasoning": {
          "lcr": 28,
          "critpt": 0
        },
        "multimodalGrounded": {
          "aaMmmuPro": 53,
          "designArenaWebsite": 1124
        },
        "knowledge": {
          "artificialAnalysis": 18.76,
          "aaGpqaDiamond": 57.8,
          "aaHle": 4.3,
          "aaOmniscienceIndex": -31.5,
          "omniscienceAccuracy": 18.3,
          "omniscienceHallucinationRate": 60.9
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 39.3
        },
        "math": {}
      }
    },
    {
      "slug": "deepseek-v4-pro-base",
      "canonicalModelKey": "deepseek-v4-pro-base",
      "model": "DeepSeek V4 Pro Base",
      "creator": "DeepSeek",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 41,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/deepseek-v4-pro-base",
      "markdownUrl": "https://benchlm.ai/md/models/deepseek-v4-pro-base.md",
      "id": 82,
      "releaseDate": "2026-04-24",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "deepseek-v4",
        "familyName": "DeepSeek V4",
        "variantType": "base",
        "snapshotLabel": "pro-base",
        "baseFamilyModelKey": "deepseek-v4-pro-max",
        "relatedModelKeys": [
          "deepseek-v4-flash-base",
          "deepseek-v4-flash",
          "deepseek-v4-flash-high",
          "deepseek-v4-flash-max",
          "deepseek-v4-pro",
          "deepseek-v4-pro-high",
          "deepseek-v4-pro-max"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 41,
        "overallScore": 41,
        "rawOverallScore": 41,
        "verifiedDisplayScore": 62,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": 24,
          "multimodalGrounded": null,
          "knowledge": 49.2,
          "multilingual": 67.8,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": 51.5,
          "multimodalGrounded": null,
          "knowledge": 63.4,
          "multilingual": 84.4,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {
          "knowledge": 62,
          "multilingual": 37
        },
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 24,
        "verifiedBenchmarkCount": 24,
        "rankableBenchmarkCount": 24,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 2
      },
      "benchmarks": {
        "agentic": {},
        "coding": {
          "bigCodeBench": 59.2,
          "humaneval": 76.8
        },
        "reasoning": {
          "bbh": 87.5,
          "drop": 88.7,
          "hellaswag": 88,
          "winogrande": 81.5,
          "cluewsc": 85.2,
          "longBenchV2": 51.5
        },
        "multimodalGrounded": {},
        "knowledge": {
          "agieval": 83.1,
          "mmlu": 90.1,
          "mmluRedux": 90.8,
          "mmluPro": 73.5,
          "mmmlu": 90.3,
          "cEval": 93.1,
          "cmmlu": 90.8,
          "multiLoKo": 51.1,
          "simpleQa": 55.2,
          "superGpqa": 53.9,
          "factsParametric": 62.6,
          "triviaQa": 85.6
        },
        "multilingual": {
          "mgsm": 84.4
        },
        "instructionFollowing": {},
        "math": {
          "gsm8k": 92.6,
          "mathBenchmark": 64.5,
          "cmath": 90.9
        }
      }
    },
    {
      "slug": "mistral-small-4",
      "canonicalModelKey": "mistral-small-4",
      "model": "Mistral Small 4",
      "creator": "Mistral",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "256K",
      "contextWindowTokens": 256000,
      "displayScore": 41,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/mistral-small-4",
      "markdownUrl": "https://benchlm.ai/md/models/mistral-small-4.md",
      "id": 66,
      "releaseDate": "2026-02-20",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "mistral-small-4",
        "familyName": "Mistral Small 4",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "mistral-small-4",
        "relatedModelKeys": [
          "mistral-small-4-reasoning"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 41,
        "overallScore": 42,
        "rawOverallScore": 42,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 36.9,
          "multilingual": null,
          "instructionFollowing": 48.2,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {
          "instructionFollowing": 84
        },
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": true,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 6,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 25.87,
          "tau2Bench": 41.2,
          "gdpvalAaNormalized": 18,
          "gdpvalAa": 859
        },
        "coding": {
          "aaCodingIndex": 24.27,
          "terminalBenchHard": 17.4,
          "aaSciCode": 38
        },
        "reasoning": {
          "lcr": 44.7,
          "critpt": 0.3
        },
        "multimodalGrounded": {
          "aaMmmuPro": 56.8
        },
        "knowledge": {
          "artificialAnalysis": 27.8,
          "aaGpqaDiamond": 76.9,
          "aaHle": 9.5,
          "aaOmniscienceIndex": -29.9,
          "omniscienceAccuracy": 22.1,
          "omniscienceHallucinationRate": 66.8
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 48.2
        },
        "math": {}
      }
    },
    {
      "slug": "grok-3-mini",
      "canonicalModelKey": "grok-3-mini",
      "model": "Grok 3 Mini",
      "creator": "xAI",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 41,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/grok-3-mini",
      "markdownUrl": "https://benchlm.ai/md/models/grok-3-mini.md",
      "id": 134,
      "releaseDate": "2025-02-19",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "grok-3-mini",
        "familyName": "Grok 3 Mini",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "grok-3-mini",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 41,
        "overallScore": 41,
        "rawOverallScore": 41,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": 36.4,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 48.9,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {
          "knowledge": 63
        },
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": true,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 5,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "sarvam-30b",
      "canonicalModelKey": "sarvam-30b",
      "model": "Sarvam 30B",
      "creator": "Sarvam",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "64K",
      "contextWindowTokens": 64000,
      "displayScore": 40,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/sarvam-30b",
      "markdownUrl": "https://benchlm.ai/md/models/sarvam-30b.md",
      "id": 136,
      "releaseDate": "2026-03-06",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "sarvam-30b",
        "familyName": "Sarvam 30B",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "sarvam-30b",
        "relatedModelKeys": [
          "sarvam-105b"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": "sarvam-m"
      },
      "scores": {
        "displayScore": 40,
        "overallScore": 53,
        "rawOverallScore": 53,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": 24.7,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 76.6,
          "multilingual": null,
          "instructionFollowing": null,
          "math": 81.2
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {
          "math": 21
        },
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": true
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 11,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 11.5,
          "tau2Bench": 34.5,
          "gdpvalAaNormalized": 0,
          "gdpvalAa": 359
        },
        "coding": {
          "aaCodingIndex": 7.92,
          "terminalBenchHard": 2.3,
          "aaSciCode": 19.2
        },
        "reasoning": {
          "lcr": 0,
          "critpt": 0.3
        },
        "multimodalGrounded": {},
        "knowledge": {
          "artificialAnalysis": 12.34,
          "aaGpqaDiamond": 63.3,
          "aaHle": 7,
          "aaOmniscienceIndex": -72,
          "omniscienceAccuracy": 12.7,
          "omniscienceHallucinationRate": 97
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 26.5
        },
        "math": {}
      }
    },
    {
      "slug": "command-a-plus",
      "canonicalModelKey": "command-a-plus",
      "model": "Command A+",
      "creator": "Cohere",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 39,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/command-a-plus",
      "markdownUrl": "https://benchlm.ai/md/models/command-a-plus.md",
      "id": 86,
      "releaseDate": "2026-05-20",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "command-a",
        "familyName": "Command A",
        "variantType": "plus",
        "snapshotLabel": null,
        "baseFamilyModelKey": "command-a-plus",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 39,
        "overallScore": 33,
        "rawOverallScore": 33,
        "verifiedDisplayScore": 60,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": 32.8,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": 59.8,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {
          "multimodalGrounded": 82
        },
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": true,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 3,
        "verifiedBenchmarkCount": 3,
        "rankableBenchmarkCount": 3,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "tau2Bench": 80.7,
          "aaAgenticIndex": 40.9,
          "gdpvalAaNormalized": 20.9,
          "gdpvalAa": 919
        },
        "coding": {
          "terminalBenchHard": 25,
          "aaCodingIndex": 29.28,
          "aaSciCode": 37.8
        },
        "reasoning": {
          "lcr": 46,
          "critpt": 0.3
        },
        "multimodalGrounded": {
          "mmmu": 75.1,
          "mmmuPro": 63,
          "charxiv": 52.7,
          "aaMmmuPro": 63.2
        },
        "knowledge": {
          "artificialAnalysis": 37.16,
          "aaGpqaDiamond": 76.1,
          "aaHle": 11.4,
          "aaOmniscienceIndex": -4,
          "omniscienceAccuracy": 8.9,
          "omniscienceHallucinationRate": 14.1
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 73.9
        },
        "math": {}
      }
    },
    {
      "slug": "laguna-xs-2",
      "canonicalModelKey": "laguna-xs-2",
      "model": "Laguna XS.2",
      "creator": "Poolside",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "131K",
      "contextWindowTokens": 131000,
      "displayScore": 37,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/laguna-xs-2",
      "markdownUrl": "https://benchlm.ai/md/models/laguna-xs-2.md",
      "id": 147,
      "releaseDate": "2026-04-28",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "laguna",
        "familyName": "Laguna",
        "variantType": "xs-2",
        "snapshotLabel": null,
        "baseFamilyModelKey": "laguna-m-1",
        "relatedModelKeys": [
          "laguna-m-1"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 37,
        "overallScore": 37,
        "rawOverallScore": 37,
        "verifiedDisplayScore": 45,
        "displayCategoryScores": {
          "agentic": 10,
          "coding": 66.9,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": 35.7,
          "coding": 55.1,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 5,
        "verifiedBenchmarkCount": 5,
        "rankableBenchmarkCount": 5,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "terminalBench2": 35.7
        },
        "coding": {
          "sweVerified": 69.9,
          "sweMultilingual": 57.7,
          "swePro": 46.3,
          "terminalBench2": 35.7
        },
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "gemma-4-e4b",
      "canonicalModelKey": "gemma-4-e4b",
      "model": "Gemma 4 E4B",
      "creator": "Google",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 37,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/gemma-4-e4b",
      "markdownUrl": "https://benchlm.ai/md/models/gemma-4-e4b.md",
      "id": 139,
      "releaseDate": "2026-04-02",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "gemma-4",
        "familyName": "Gemma 4",
        "variantType": "e4b",
        "snapshotLabel": null,
        "baseFamilyModelKey": "gemma-4-31b",
        "relatedModelKeys": [
          "gemma-4-31b",
          "gemma-4-26b-a4b",
          "gemma-4-e2b"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 37,
        "overallScore": 44,
        "rawOverallScore": 44,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": 41.4,
          "reasoning": null,
          "multimodalGrounded": 29.1,
          "knowledge": 48.3,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 6,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 6.92,
          "tau2Bench": 20.8,
          "gdpvalAaNormalized": 0,
          "gdpvalAa": 303
        },
        "coding": {
          "aaCodingIndex": 13.7,
          "terminalBenchHard": 8.3,
          "aaSciCode": 24.4
        },
        "reasoning": {
          "lcr": 30.7,
          "critpt": 0.6
        },
        "multimodalGrounded": {
          "aaMmmuPro": 51.4
        },
        "knowledge": {
          "gpqa": 58.6,
          "mmluPro": 69.4,
          "artificialAnalysis": 18.76,
          "aaGpqaDiamond": 57.6,
          "aaHle": 3.7,
          "aaOmniscienceIndex": -20,
          "omniscienceAccuracy": 8.6,
          "omniscienceHallucinationRate": 31.3
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 44.2
        },
        "math": {}
      }
    },
    {
      "slug": "ling-2-6-flash",
      "canonicalModelKey": "ling-2-6-flash",
      "model": "Ling 2.6 Flash",
      "creator": "InclusionAI",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "262K",
      "contextWindowTokens": 262000,
      "displayScore": 36,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/ling-2-6-flash",
      "markdownUrl": "https://benchlm.ai/md/models/ling-2-6-flash.md",
      "id": 153,
      "releaseDate": "2026-04-21",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "ling-2-6",
        "familyName": "Ling 2.6",
        "variantType": "flash",
        "snapshotLabel": null,
        "baseFamilyModelKey": "ling-2-6-flash",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 36,
        "overallScore": 36,
        "rawOverallScore": 36,
        "verifiedDisplayScore": 41,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 44.9,
          "multilingual": null,
          "instructionFollowing": 14.6,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": 27,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 59,
          "multilingual": null,
          "instructionFollowing": 57,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {
          "instructionFollowing": 115
        },
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": true,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 3,
        "verifiedBenchmarkCount": 3,
        "rankableBenchmarkCount": 3,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "tau2Bench": 86,
          "gdpvalAa": 785,
          "aaAgenticIndex": 38.06,
          "gdpvalAaNormalized": 14.2
        },
        "coding": {
          "sciCode": 27,
          "aaCodingIndex": 23.17,
          "terminalBenchHard": 21.2,
          "aaSciCode": 27.1
        },
        "reasoning": {
          "lcr": 25,
          "critpt": 0
        },
        "multimodalGrounded": {},
        "knowledge": {
          "artificialAnalysis": 26.16,
          "gpqa": 59,
          "aaGpqaDiamond": 59.3,
          "aaHle": 6.2,
          "aaOmniscienceIndex": -65.7,
          "omniscienceAccuracy": 15.4,
          "omniscienceHallucinationRate": 95.8
        },
        "multilingual": {},
        "instructionFollowing": {
          "ifBench": 57,
          "aaIfBench": 57.4
        },
        "math": {}
      }
    },
    {
      "slug": "granite-4-0-1b",
      "canonicalModelKey": "granite-4-0-1b",
      "model": "Granite-4.0-1B",
      "creator": "IBM",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 32,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/granite-4-0-1b",
      "markdownUrl": "https://benchlm.ai/md/models/granite-4-0-1b.md",
      "id": 157,
      "releaseDate": "2025-10-28",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "granite-4-0-1b",
        "familyName": "Granite 4.0 1B",
        "variantType": "dense",
        "snapshotLabel": null,
        "baseFamilyModelKey": "granite-4-0-h-1b",
        "relatedModelKeys": [
          "granite-4-0-h-1b"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 32,
        "overallScore": 42,
        "rawOverallScore": 42,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": 42.2,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {
          "multilingual": 103,
          "instructionFollowing": 93
        },
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": true,
          "instructionFollowing": true,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 7,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 7.6,
          "tau2Bench": 22.8,
          "gdpvalAaNormalized": 0,
          "gdpvalAa": 255
        },
        "coding": {
          "aaCodingIndex": 2.89,
          "terminalBenchHard": 0,
          "aaSciCode": 8.7
        },
        "reasoning": {
          "lcr": 4,
          "critpt": 0
        },
        "multimodalGrounded": {},
        "knowledge": {
          "artificialAnalysis": 7.34,
          "aaGpqaDiamond": 28.1,
          "aaHle": 5.1,
          "aaOmniscienceIndex": -81.8,
          "omniscienceAccuracy": 6.1,
          "omniscienceHallucinationRate": 93.5
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 20.5
        },
        "math": {}
      }
    },
    {
      "slug": "deepseek-v4-flash-base",
      "canonicalModelKey": "deepseek-v4-flash-base",
      "model": "DeepSeek V4 Flash Base",
      "creator": "DeepSeek",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 29,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/deepseek-v4-flash-base",
      "markdownUrl": "https://benchlm.ai/md/models/deepseek-v4-flash-base.md",
      "id": 110,
      "releaseDate": "2026-04-24",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "deepseek-v4",
        "familyName": "DeepSeek V4",
        "variantType": "base",
        "snapshotLabel": "flash-base",
        "baseFamilyModelKey": "deepseek-v4-pro-max",
        "relatedModelKeys": [
          "deepseek-v4-pro-base",
          "deepseek-v4-flash",
          "deepseek-v4-flash-high",
          "deepseek-v4-flash-max",
          "deepseek-v4-pro",
          "deepseek-v4-pro-high",
          "deepseek-v4-pro-max"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 29,
        "overallScore": 29,
        "rawOverallScore": 29,
        "verifiedDisplayScore": 55,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": 11,
          "multimodalGrounded": null,
          "knowledge": 30.1,
          "multilingual": 71.4,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": 44.7,
          "multimodalGrounded": null,
          "knowledge": 52.2,
          "multilingual": 85.7,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {
          "knowledge": 85,
          "multilingual": 29
        },
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": true,
          "multilingual": true,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 24,
        "verifiedBenchmarkCount": 24,
        "rankableBenchmarkCount": 24,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 2
      },
      "benchmarks": {
        "agentic": {},
        "coding": {
          "bigCodeBench": 56.8,
          "humaneval": 69.5
        },
        "reasoning": {
          "bbh": 86.9,
          "drop": 88.6,
          "hellaswag": 85.7,
          "winogrande": 79.5,
          "cluewsc": 82.2,
          "longBenchV2": 44.7
        },
        "multimodalGrounded": {},
        "knowledge": {
          "agieval": 82.6,
          "mmlu": 88.7,
          "mmluRedux": 89.4,
          "mmluPro": 68.3,
          "mmmlu": 88.8,
          "cEval": 92.1,
          "cmmlu": 90.4,
          "multiLoKo": 42.2,
          "simpleQa": 30.1,
          "superGpqa": 46.5,
          "factsParametric": 33.9,
          "triviaQa": 82.8
        },
        "multilingual": {
          "mgsm": 85.7
        },
        "instructionFollowing": {},
        "math": {
          "gsm8k": 90.8,
          "mathBenchmark": 57.4,
          "cmath": 93.6
        }
      }
    },
    {
      "slug": "qwen3-5-flash",
      "canonicalModelKey": "qwen3-5-flash",
      "model": "Qwen3.5 Flash",
      "creator": "Alibaba",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 28,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/qwen3-5-flash",
      "markdownUrl": "https://benchlm.ai/md/models/qwen3-5-flash.md",
      "id": 94,
      "releaseDate": "2026-03-04",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "qwen3-5-flash",
        "familyName": "Qwen3.5 Flash",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "qwen3-5-flash",
        "relatedModelKeys": [
          "qwen3-5-35b-a3b"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 28,
        "overallScore": 11,
        "rawOverallScore": 11,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": 16.5,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": 46.5,
          "math": 10.7
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {
          "instructionFollowing": 88
        },
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": true,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 3,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "ternary-bonsai-1-7b",
      "canonicalModelKey": "ternary-bonsai-1-7b",
      "model": "Ternary Bonsai 1.7B",
      "creator": "Prism ML",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "32K",
      "contextWindowTokens": 32000,
      "displayScore": 28,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/ternary-bonsai-1-7b",
      "markdownUrl": "https://benchlm.ai/md/models/ternary-bonsai-1-7b.md",
      "id": 109,
      "releaseDate": "2026-04-16",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "ternary-bonsai",
        "familyName": "Ternary Bonsai",
        "variantType": "1-7b",
        "snapshotLabel": null,
        "baseFamilyModelKey": "ternary-bonsai-8b",
        "relatedModelKeys": [
          "ternary-bonsai-8b",
          "ternary-bonsai-4b"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 28,
        "overallScore": 28,
        "rawOverallScore": 28,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": 33,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": 11.4,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {
          "instructionFollowing": 117
        },
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": true,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 2,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "mellum2-12b-a2-5b-instruct",
      "canonicalModelKey": "mellum2-12b-a2-5b-instruct",
      "model": "Mellum2-12B-A2.5B-Instruct",
      "creator": "JetBrains",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 27,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/mellum2-12b-a2-5b-instruct",
      "markdownUrl": "https://benchlm.ai/md/models/mellum2-12b-a2-5b-instruct.md",
      "id": 253,
      "releaseDate": "2026-05-28",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "mellum2-12b-a2-5b",
        "familyName": "Mellum2 12B-A2.5B",
        "variantType": "instruct",
        "snapshotLabel": null,
        "baseFamilyModelKey": "mellum2-12b-a2-5b-instruct",
        "relatedModelKeys": [
          "mellum2-12b-a2-5b-thinking"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 27,
        "overallScore": 27,
        "rawOverallScore": 27,
        "verifiedDisplayScore": 44,
        "displayCategoryScores": {
          "agentic": null,
          "coding": 30.7,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 17.2,
          "multilingual": null,
          "instructionFollowing": 32.2,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": 37.2,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 40.9,
          "multilingual": null,
          "instructionFollowing": 75.8,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {
          "instructionFollowing": 109
        },
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": true,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 6,
        "verifiedBenchmarkCount": 6,
        "rankableBenchmarkCount": 6,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "bfclV4": 44.2
        },
        "coding": {
          "liveCodeBench": 37.2
        },
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {
          "mmluRedux": 78.1,
          "gpqa": 40.9,
          "gpqaDiamond": 40.9
        },
        "multilingual": {},
        "instructionFollowing": {
          "ifeval": 75.8
        },
        "math": {}
      }
    },
    {
      "slug": "claude-opus-4-6-thinking",
      "canonicalModelKey": "claude-opus-4-6-thinking",
      "model": "Claude Opus 4.6 (Adaptive)",
      "creator": "Anthropic",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 24,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/claude-opus-4-6-thinking",
      "markdownUrl": "https://benchlm.ai/md/models/claude-opus-4-6-thinking.md",
      "id": 228,
      "releaseDate": "2026-02-01",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "claude-opus-4-6",
        "familyName": "Claude Opus 4.6",
        "variantType": "reasoning",
        "snapshotLabel": "adaptive",
        "baseFamilyModelKey": "claude-opus-4-6",
        "relatedModelKeys": [
          "claude-opus-4-6"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 24,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 1,
        "verifiedBenchmarkCount": 1,
        "rankableBenchmarkCount": 1,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 67.58,
          "apexAgentsAa": 33,
          "tau2Bench": 92.1,
          "gdpvalAaNormalized": 55.9,
          "gdpvalAa": 1619
        },
        "coding": {
          "vibeCodeBench": 53.498,
          "aaCodingIndex": 48.09,
          "terminalBenchHard": 46.2,
          "aaSciCode": 51.9
        },
        "reasoning": {
          "lcr": 70.7,
          "critpt": 12.6
        },
        "multimodalGrounded": {
          "aaMmmuPro": 75.4
        },
        "knowledge": {
          "artificialAnalysis": 52.95,
          "aaGpqaDiamond": 89.6,
          "aaHle": 36.7,
          "aaOmniscienceIndex": 13.5,
          "omniscienceAccuracy": 46.4,
          "omniscienceHallucinationRate": 61.3
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 53.1
        },
        "math": {}
      }
    },
    {
      "slug": "qwen2-5-vl-32b",
      "canonicalModelKey": "qwen2-5-vl-32b",
      "model": "Qwen2.5-VL-32B",
      "creator": "Alibaba",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "32K",
      "contextWindowTokens": 32000,
      "displayScore": 24,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/qwen2-5-vl-32b",
      "markdownUrl": "https://benchlm.ai/md/models/qwen2-5-vl-32b.md",
      "id": 107,
      "releaseDate": "2025-01-26",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "qwen2-5-vl-32b",
        "familyName": "Qwen2.5-VL-32B",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "qwen2-5-vl-32b",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 24,
        "overallScore": 32,
        "rawOverallScore": 32,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": 23.7,
          "knowledge": 37,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 4,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "gemma-4-e2b",
      "canonicalModelKey": "gemma-4-e2b",
      "model": "Gemma 4 E2B",
      "creator": "Google",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 24,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/gemma-4-e2b",
      "markdownUrl": "https://benchlm.ai/md/models/gemma-4-e2b.md",
      "id": 161,
      "releaseDate": "2026-04-02",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "gemma-4",
        "familyName": "Gemma 4",
        "variantType": "e2b",
        "snapshotLabel": null,
        "baseFamilyModelKey": "gemma-4-31b",
        "relatedModelKeys": [
          "gemma-4-31b",
          "gemma-4-26b-a4b",
          "gemma-4-e4b"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 24,
        "overallScore": 29,
        "rawOverallScore": 29,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": 31.1,
          "reasoning": null,
          "multimodalGrounded": 14.4,
          "knowledge": 25.2,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 6,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 6.92,
          "tau2Bench": 20.8,
          "gdpvalAaNormalized": 0,
          "gdpvalAa": 270
        },
        "coding": {
          "aaCodingIndex": 9,
          "terminalBenchHard": 3,
          "aaSciCode": 20.9
        },
        "reasoning": {
          "lcr": 15,
          "critpt": 0
        },
        "multimodalGrounded": {
          "aaMmmuPro": 44.6
        },
        "knowledge": {
          "gpqa": 43.4,
          "mmluPro": 60,
          "artificialAnalysis": 15.21,
          "aaGpqaDiamond": 43.3,
          "aaHle": 4.8,
          "aaOmniscienceIndex": -24,
          "omniscienceAccuracy": 6.7,
          "omniscienceHallucinationRate": 32.9
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 38
        },
        "math": {}
      }
    },
    {
      "slug": "bonsai-1-7b",
      "canonicalModelKey": "bonsai-1-7b",
      "model": "1-bit Bonsai 1.7B",
      "creator": "Prism ML",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "32K",
      "contextWindowTokens": 32000,
      "displayScore": 24,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/bonsai-1-7b",
      "markdownUrl": "https://benchlm.ai/md/models/bonsai-1-7b.md",
      "id": 160,
      "releaseDate": "2026-03-31",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "bonsai",
        "familyName": "1-bit Bonsai",
        "variantType": "1-7b",
        "snapshotLabel": null,
        "baseFamilyModelKey": "bonsai-8b",
        "relatedModelKeys": [
          "bonsai-8b",
          "bonsai-4b"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 24,
        "overallScore": 24,
        "rawOverallScore": 24,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": 24,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {
          "instructionFollowing": 128
        },
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": true,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 4,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "claude-opus-4-7",
      "canonicalModelKey": "claude-opus-4-7",
      "model": "Claude Opus 4.7",
      "creator": "Anthropic",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 23,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/claude-opus-4-7",
      "markdownUrl": "https://benchlm.ai/md/models/claude-opus-4-7.md",
      "id": 174,
      "releaseDate": "2026-04-16",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "claude-opus-4-7",
        "familyName": "Claude Opus 4.7",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "claude-opus-4-7",
        "relatedModelKeys": [
          "claude-opus-4-7-max",
          "claude-opus-4-6",
          "claude-opus-4-5"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": "claude-opus-4-6"
      },
      "scores": {
        "displayScore": 23,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 2,
        "verifiedBenchmarkCount": 2,
        "rankableBenchmarkCount": 3,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 64.64,
          "tau2Bench": 74,
          "gdpvalAaNormalized": 58.6,
          "gdpvalAa": 1672,
          "gertLabs": 65.59
        },
        "coding": {
          "vibeCodeBench": 71.003,
          "reactNativeEvals": 82.8,
          "aaCodingIndex": 53.07,
          "terminalBenchHard": 54.5,
          "aaSciCode": 50.1
        },
        "reasoning": {
          "lcr": 67,
          "critpt": 5.1
        },
        "multimodalGrounded": {
          "aaMmmuPro": 76.4,
          "designArenaWebsite": 1338
        },
        "knowledge": {
          "artificialAnalysis": 51.82,
          "aaGpqaDiamond": 88.5,
          "aaHle": 31.2,
          "aaOmniscienceIndex": 14.2,
          "omniscienceAccuracy": 43.5,
          "omniscienceHallucinationRate": 51.9
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 43.6
        },
        "math": {}
      }
    },
    {
      "slug": "ternary-bonsai-4b",
      "canonicalModelKey": "ternary-bonsai-4b",
      "model": "Ternary Bonsai 4B",
      "creator": "Prism ML",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "32K",
      "contextWindowTokens": 32000,
      "displayScore": 23,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/ternary-bonsai-4b",
      "markdownUrl": "https://benchlm.ai/md/models/ternary-bonsai-4b.md",
      "id": 120,
      "releaseDate": "2026-04-16",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "ternary-bonsai",
        "familyName": "Ternary Bonsai",
        "variantType": "4b",
        "snapshotLabel": null,
        "baseFamilyModelKey": "ternary-bonsai-8b",
        "relatedModelKeys": [
          "ternary-bonsai-8b",
          "ternary-bonsai-1-7b"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 23,
        "overallScore": 23,
        "rawOverallScore": 23,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": 24,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": 18.7,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {
          "instructionFollowing": 114
        },
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": true,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 2,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "bonsai-8b",
      "canonicalModelKey": "bonsai-8b",
      "model": "1-bit Bonsai 8B",
      "creator": "Prism ML",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "64K",
      "contextWindowTokens": 64000,
      "displayScore": 23,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/bonsai-8b",
      "markdownUrl": "https://benchlm.ai/md/models/bonsai-8b.md",
      "id": 126,
      "releaseDate": "2026-03-31",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "bonsai",
        "familyName": "1-bit Bonsai",
        "variantType": "8b",
        "snapshotLabel": null,
        "baseFamilyModelKey": "bonsai-8b",
        "relatedModelKeys": [
          "bonsai-4b",
          "bonsai-1-7b"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 23,
        "overallScore": 23,
        "rawOverallScore": 23,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": 31.7,
          "multimodalGrounded": null,
          "knowledge": 0.5,
          "multilingual": null,
          "instructionFollowing": 46.9,
          "math": 22
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {
          "instructionFollowing": 87
        },
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": true,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 4,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "claude-opus-4-5-thinking",
      "canonicalModelKey": "claude-opus-4-5-thinking",
      "model": "Claude Opus 4.5 Thinking",
      "creator": "Anthropic",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "200K",
      "contextWindowTokens": 200000,
      "displayScore": 21,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/claude-opus-4-5-thinking",
      "markdownUrl": "https://benchlm.ai/md/models/claude-opus-4-5-thinking.md",
      "id": 230,
      "releaseDate": "2025-11-01",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "claude-opus-4-5",
        "familyName": "Claude Opus 4.5",
        "variantType": "reasoning",
        "snapshotLabel": "thinking",
        "baseFamilyModelKey": "claude-opus-4-5",
        "relatedModelKeys": [
          "claude-opus-4-5"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 21,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 1,
        "verifiedBenchmarkCount": 1,
        "rankableBenchmarkCount": 2,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 59.64,
          "tau2Bench": 89.5,
          "gdpvalAaNormalized": 47.3,
          "gdpvalAa": 1446
        },
        "coding": {
          "vibeCodeBench": 20.63,
          "aaCodingIndex": 47.83,
          "terminalBenchHard": 47,
          "aaSciCode": 49.5
        },
        "reasoning": {
          "lcr": 74,
          "critpt": 4.6
        },
        "multimodalGrounded": {
          "aaMmmuPro": 74,
          "designArenaWebsite": 1292
        },
        "knowledge": {
          "artificialAnalysis": 49.73,
          "aaGpqaDiamond": 86.6,
          "aaHle": 28.4,
          "aaOmniscienceIndex": 13.3,
          "omniscienceAccuracy": 45.7,
          "omniscienceHallucinationRate": 59.8
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 58
        },
        "math": {}
      }
    },
    {
      "slug": "glm-5-turbo",
      "canonicalModelKey": "glm-5-turbo",
      "model": "GLM-5-Turbo",
      "creator": "Z.AI",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "200K",
      "contextWindowTokens": 200000,
      "displayScore": 20,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/glm-5-turbo",
      "markdownUrl": "https://benchlm.ai/md/models/glm-5-turbo.md",
      "id": 204,
      "releaseDate": "2026-03-01",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "glm-5",
        "familyName": "GLM-5",
        "variantType": "turbo",
        "snapshotLabel": null,
        "baseFamilyModelKey": "glm-5",
        "relatedModelKeys": [
          "glm-5",
          "glm-5-reasoning"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 20,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 1,
        "verifiedBenchmarkCount": 1,
        "rankableBenchmarkCount": 3,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "clawEval": 55.8,
          "aaAgenticIndex": 66.13,
          "tau2Bench": 98.5,
          "gdpvalAaNormalized": 49.7,
          "gdpvalAa": 1493
        },
        "coding": {
          "aaCodingIndex": 36.77,
          "terminalBenchHard": 33.3,
          "aaSciCode": 43.6
        },
        "reasoning": {
          "lcr": 60.7,
          "critpt": 0.3
        },
        "multimodalGrounded": {
          "designArenaWebsite": 1322
        },
        "knowledge": {
          "artificialAnalysis": 46.76,
          "aaGpqaDiamond": 84.7,
          "aaHle": 25.4,
          "aaOmniscienceIndex": -15.1,
          "omniscienceAccuracy": 29,
          "omniscienceHallucinationRate": 62.2
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 73.2
        },
        "math": {}
      }
    },
    {
      "slug": "gpt-5-2-instant",
      "canonicalModelKey": "gpt-5-2-instant",
      "model": "GPT-5.2 Instant",
      "creator": "OpenAI",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 19,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/gpt-5-2-instant",
      "markdownUrl": "https://benchlm.ai/md/models/gpt-5-2-instant.md",
      "id": 179,
      "releaseDate": "2025-12-11",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "gpt-5-2",
        "familyName": "GPT-5.2",
        "variantType": "instant",
        "snapshotLabel": null,
        "baseFamilyModelKey": "gpt-5-2",
        "relatedModelKeys": [
          "gpt-5-2",
          "gpt-5-2-pro"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 19,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 0,
        "generatedBenchmarkCount": 32,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "gpt-5-2-pro",
      "canonicalModelKey": "gpt-5-2-pro",
      "model": "GPT-5.2 Pro",
      "creator": "OpenAI",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "400K",
      "contextWindowTokens": 400000,
      "displayScore": 19,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/gpt-5-2-pro",
      "markdownUrl": "https://benchlm.ai/md/models/gpt-5-2-pro.md",
      "id": 177,
      "releaseDate": "2025-12-11",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "gpt-5-2",
        "familyName": "GPT-5.2",
        "variantType": "pro",
        "snapshotLabel": null,
        "baseFamilyModelKey": "gpt-5-2",
        "relatedModelKeys": [
          "gpt-5-2",
          "gpt-5-2-instant"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 19,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 0,
        "generatedBenchmarkCount": 31,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "gpt-5-3-instant",
      "canonicalModelKey": "gpt-5-3-instant",
      "model": "GPT-5.3 Instant",
      "creator": "OpenAI",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "400K",
      "contextWindowTokens": 400000,
      "displayScore": 18,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/gpt-5-3-instant",
      "markdownUrl": "https://benchlm.ai/md/models/gpt-5-3-instant.md",
      "id": 178,
      "releaseDate": "2026-03-03",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "gpt-5-3",
        "familyName": "GPT-5.3",
        "variantType": "instant",
        "snapshotLabel": null,
        "baseFamilyModelKey": "gpt-5-3-instant",
        "relatedModelKeys": [
          "gpt-5-3-codex"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 18,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 0,
        "generatedBenchmarkCount": 35,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "gpt-5-3-codex-spark",
      "canonicalModelKey": "gpt-5-3-codex-spark",
      "model": "GPT-5.3-Codex-Spark",
      "creator": "OpenAI",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "256K",
      "contextWindowTokens": 256000,
      "displayScore": 17,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/gpt-5-3-codex-spark",
      "markdownUrl": "https://benchlm.ai/md/models/gpt-5-3-codex-spark.md",
      "id": 180,
      "releaseDate": "2026-02-12",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "gpt-5-3-codex",
        "familyName": "GPT-5.3 Codex",
        "variantType": "spark",
        "snapshotLabel": null,
        "baseFamilyModelKey": "gpt-5-3-codex",
        "relatedModelKeys": [
          "gpt-5-3-codex"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 17,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 0,
        "generatedBenchmarkCount": 49,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "gpt-5-1-codex",
      "canonicalModelKey": "gpt-5-1-codex",
      "model": "GPT-5.1-Codex",
      "creator": "OpenAI",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "400K",
      "contextWindowTokens": 400000,
      "displayScore": 17,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/gpt-5-1-codex",
      "markdownUrl": "https://benchlm.ai/md/models/gpt-5-1-codex.md",
      "id": 231,
      "releaseDate": "2025-10-15",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "gpt-5-1-codex",
        "familyName": "GPT-5.1-Codex",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "gpt-5-1-codex",
        "relatedModelKeys": [
          "gpt-5-1-codex-max"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 17,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 1,
        "verifiedBenchmarkCount": 1,
        "rankableBenchmarkCount": 2,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 50.68,
          "tau2Bench": 83,
          "gdpvalAaNormalized": 34.5,
          "gdpvalAa": 1191,
          "gertLabs": 49.68
        },
        "coding": {
          "vibeCodeBench": 13.115,
          "aaCodingIndex": 36.62,
          "terminalBenchHard": 34.8,
          "aaSciCode": 40.2
        },
        "reasoning": {
          "lcr": 67.3,
          "critpt": 5.7
        },
        "multimodalGrounded": {
          "aaMmmuPro": 72.5,
          "designArenaWebsite": 1206
        },
        "knowledge": {
          "artificialAnalysis": 43.11,
          "aaGpqaDiamond": 86,
          "aaHle": 23.4,
          "aaOmniscienceIndex": -6,
          "omniscienceAccuracy": 39.2,
          "omniscienceHallucinationRate": 74.4
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 70
        },
        "math": {}
      }
    },
    {
      "slug": "bonsai-4b",
      "canonicalModelKey": "bonsai-4b",
      "model": "1-bit Bonsai 4B",
      "creator": "Prism ML",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "32K",
      "contextWindowTokens": 32000,
      "displayScore": 17,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/bonsai-4b",
      "markdownUrl": "https://benchlm.ai/md/models/bonsai-4b.md",
      "id": 148,
      "releaseDate": "2026-03-31",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "bonsai",
        "familyName": "1-bit Bonsai",
        "variantType": "4b",
        "snapshotLabel": null,
        "baseFamilyModelKey": "bonsai-8b",
        "relatedModelKeys": [
          "bonsai-8b",
          "bonsai-1-7b"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 17,
        "overallScore": 17,
        "rawOverallScore": 17,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": 18.1,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": 9.5,
          "math": 21.5
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {
          "instructionFollowing": 118
        },
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": true,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 4,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "grok-4-1-fast-reasoning",
      "canonicalModelKey": "grok-4-1-fast-reasoning",
      "model": "Grok 4.1 Fast (Reasoning)",
      "creator": "xAI",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "2M",
      "contextWindowTokens": 2000000,
      "displayScore": 16,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/grok-4-1-fast-reasoning",
      "markdownUrl": "https://benchlm.ai/md/models/grok-4-1-fast-reasoning.md",
      "id": 235,
      "releaseDate": "2025-11-19",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "grok-4-1-fast",
        "familyName": "Grok 4.1 Fast",
        "variantType": "reasoning",
        "snapshotLabel": "reasoning",
        "baseFamilyModelKey": "grok-4-1-fast",
        "relatedModelKeys": [
          "grok-4-1-fast"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 16,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 1,
        "verifiedBenchmarkCount": 1,
        "rankableBenchmarkCount": 1,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 49.31,
          "tau2Bench": 93.3,
          "gdpvalAaNormalized": 27.3,
          "gdpvalAa": 1045
        },
        "coding": {
          "vibeCodeBench": 1.2,
          "aaCodingIndex": 30.9,
          "terminalBenchHard": 24.2,
          "aaSciCode": 44.2
        },
        "reasoning": {
          "lcr": 68,
          "critpt": 2.9
        },
        "multimodalGrounded": {
          "aaMmmuPro": 63.3
        },
        "knowledge": {
          "artificialAnalysis": 38.61,
          "aaGpqaDiamond": 85.3,
          "aaHle": 17.6,
          "aaOmniscienceIndex": -28.7,
          "omniscienceAccuracy": 25.3,
          "omniscienceHallucinationRate": 72.4
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 52.7
        },
        "math": {}
      }
    },
    {
      "slug": "glm-4-6",
      "canonicalModelKey": "glm-4-6",
      "model": "GLM-4.6",
      "creator": "Z.AI",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "200K",
      "contextWindowTokens": 200000,
      "displayScore": 14,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/glm-4-6",
      "markdownUrl": "https://benchlm.ai/md/models/glm-4-6.md",
      "id": 234,
      "releaseDate": "2025-09-01",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "glm-4-6",
        "familyName": "GLM-4.6",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "glm-4-6",
        "relatedModelKeys": [
          "glm-4-7"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 14,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 1,
        "verifiedBenchmarkCount": 1,
        "rankableBenchmarkCount": 1,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 42.89,
          "tau2Bench": 76.9,
          "gdpvalAaNormalized": 24.3,
          "gdpvalAa": 985
        },
        "coding": {
          "vibeCodeBench": 3.09,
          "aaCodingIndex": 30.23,
          "terminalBenchHard": 28.8,
          "aaSciCode": 33.1
        },
        "reasoning": {
          "lcr": 26.3,
          "critpt": 0
        },
        "multimodalGrounded": {},
        "knowledge": {
          "artificialAnalysis": 30.24,
          "aaGpqaDiamond": 63.2,
          "aaHle": 5.2,
          "aaOmniscienceIndex": -31.6,
          "omniscienceAccuracy": 20.8,
          "omniscienceHallucinationRate": 66.1
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 36.7
        },
        "math": {}
      }
    },
    {
      "slug": "grok-4-fast-reasoning",
      "canonicalModelKey": "grok-4-fast-reasoning",
      "model": "Grok 4 Fast (Reasoning)",
      "creator": "xAI",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "2M",
      "contextWindowTokens": 2000000,
      "displayScore": 14,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/grok-4-fast-reasoning",
      "markdownUrl": "https://benchlm.ai/md/models/grok-4-fast-reasoning.md",
      "id": 236,
      "releaseDate": "2025-09-19",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "grok-4-fast",
        "familyName": "Grok 4 Fast",
        "variantType": "reasoning",
        "snapshotLabel": "reasoning",
        "baseFamilyModelKey": "grok-4-fast-reasoning",
        "relatedModelKeys": [
          "grok-4"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 14,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 1,
        "verifiedBenchmarkCount": 1,
        "rankableBenchmarkCount": 1,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 39.51,
          "tau2Bench": 65.8,
          "gdpvalAaNormalized": 25.7,
          "gdpvalAa": 1015
        },
        "coding": {
          "vibeCodeBench": 0,
          "aaCodingIndex": 27.36,
          "terminalBenchHard": 18.9,
          "aaSciCode": 44.2
        },
        "reasoning": {
          "lcr": 64.7,
          "critpt": 2.9
        },
        "multimodalGrounded": {
          "aaMmmuPro": 61.8
        },
        "knowledge": {
          "artificialAnalysis": 35.06,
          "aaGpqaDiamond": 84.7,
          "aaHle": 17,
          "aaOmniscienceIndex": -28.4,
          "omniscienceAccuracy": 22.6,
          "omniscienceHallucinationRate": 66
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 50.5
        },
        "math": {}
      }
    },
    {
      "slug": "trinity-large-preview",
      "canonicalModelKey": "trinity-large-preview",
      "model": "Trinity-Large-Preview",
      "creator": "Arcee AI",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "512K",
      "contextWindowTokens": 512000,
      "displayScore": 12,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/trinity-large-preview",
      "markdownUrl": "https://benchlm.ai/md/models/trinity-large-preview.md",
      "id": 175,
      "releaseDate": "2026-01-27",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "trinity-large",
        "familyName": "Trinity Large",
        "variantType": "preview",
        "snapshotLabel": "preview",
        "baseFamilyModelKey": "trinity-large-thinking",
        "relatedModelKeys": [
          "trinity-large-thinking"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 12,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 4,
        "verifiedBenchmarkCount": 4,
        "rankableBenchmarkCount": 5,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 42.61,
          "tau2Bench": 90.1,
          "gdpvalAaNormalized": 18.2,
          "gdpvalAa": 864
        },
        "coding": {
          "aaCodingIndex": 27.19,
          "terminalBenchHard": 22.7,
          "aaSciCode": 36.1
        },
        "reasoning": {
          "lcr": 33,
          "critpt": 0.9
        },
        "multimodalGrounded": {
          "designArenaWebsite": 1181
        },
        "knowledge": {
          "mmlu": 87.2,
          "mmluProArcee": 75.2,
          "gpqaDiamond": 63.3,
          "artificialAnalysis": 31.87,
          "aaGpqaDiamond": 75.2,
          "aaHle": 14.7,
          "aaOmniscienceIndex": -44.2,
          "omniscienceAccuracy": 22.8,
          "omniscienceHallucinationRate": 86.6
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 56.3
        },
        "math": {
          "aime2025Arcee": 24
        }
      }
    },
    {
      "slug": "trinity-large-thinking",
      "canonicalModelKey": "trinity-large-thinking",
      "model": "Trinity-Large-Thinking",
      "creator": "Arcee AI",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "512K",
      "contextWindowTokens": 512000,
      "displayScore": 12,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/trinity-large-thinking",
      "markdownUrl": "https://benchlm.ai/md/models/trinity-large-thinking.md",
      "id": 116,
      "releaseDate": "2026-03-10",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "trinity-large",
        "familyName": "Trinity Large",
        "variantType": "thinking",
        "snapshotLabel": null,
        "baseFamilyModelKey": "trinity-large-thinking",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 12,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {
          "instructionFollowing": 129
        },
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": true,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 4,
        "verifiedBenchmarkCount": 4,
        "rankableBenchmarkCount": 8,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "tau2Bench": 90.1,
          "aaAgenticIndex": 42.61,
          "gdpvalAaNormalized": 18.2,
          "gdpvalAa": 864,
          "gertLabs": 32.55
        },
        "coding": {
          "sweVerifiedArcee": 63.2,
          "aaCodingIndex": 27.19,
          "terminalBenchHard": 22.7,
          "aaSciCode": 36.1
        },
        "reasoning": {
          "lcr": 33,
          "critpt": 0.9
        },
        "multimodalGrounded": {
          "designArenaWebsite": 1181
        },
        "knowledge": {
          "gpqaDiamond": 76.3,
          "mmluProArcee": 83.4,
          "artificialAnalysis": 31.87,
          "aaGpqaDiamond": 75.2,
          "aaHle": 14.7,
          "aaOmniscienceIndex": -44.2,
          "omniscienceAccuracy": 22.8,
          "omniscienceHallucinationRate": 86.6
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 56.3
        },
        "math": {
          "aime2025Arcee": 96.3
        }
      }
    },
    {
      "slug": "qwen3-max",
      "canonicalModelKey": "qwen3-max",
      "model": "Qwen3 Max",
      "creator": "Alibaba",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 12,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/qwen3-max",
      "markdownUrl": "https://benchlm.ai/md/models/qwen3-max.md",
      "id": 233,
      "releaseDate": "2026-04-20",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "qwen3-max",
        "familyName": "Qwen3 Max",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "qwen3-max",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 12,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 1,
        "verifiedBenchmarkCount": 1,
        "rankableBenchmarkCount": 2,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 43.01,
          "tau2Bench": 74.3,
          "gdpvalAaNormalized": 26.8,
          "gdpvalAa": 1037,
          "gertLabs": 43.74
        },
        "coding": {
          "vibeCodeBench": 3.506,
          "aaCodingIndex": 26.41,
          "terminalBenchHard": 20.5,
          "aaSciCode": 38.3
        },
        "reasoning": {
          "lcr": 46.7,
          "critpt": 0
        },
        "multimodalGrounded": {
          "designArenaWebsite": 1164
        },
        "knowledge": {
          "artificialAnalysis": 31.38,
          "aaGpqaDiamond": 76.4,
          "aaHle": 11.1,
          "aaOmniscienceIndex": -43.1,
          "omniscienceAccuracy": 24.4,
          "omniscienceHallucinationRate": 89.4
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 44.1
        },
        "math": {}
      }
    },
    {
      "slug": "glm-4-7-flash",
      "canonicalModelKey": "glm-4-7-flash",
      "model": "GLM-4.7-Flash",
      "creator": "Z.AI",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "200K",
      "contextWindowTokens": 200000,
      "displayScore": 11,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/glm-4-7-flash",
      "markdownUrl": "https://benchlm.ai/md/models/glm-4-7-flash.md",
      "id": 186,
      "releaseDate": "2025-10-01",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "glm-4-7-flash",
        "familyName": "GLM-4.7-Flash",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "glm-4-7-flash",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 11,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 0,
        "generatedBenchmarkCount": 50,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "mercury-2",
      "canonicalModelKey": "mercury-2",
      "model": "Mercury 2",
      "creator": "Inception",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 11,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/mercury-2",
      "markdownUrl": "https://benchlm.ai/md/models/mercury-2.md",
      "id": 184,
      "releaseDate": "2026-03-01",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "mercury-2",
        "familyName": "Mercury 2",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "mercury-2",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 11,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 0,
        "generatedBenchmarkCount": 48,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "lfm2-5-350m",
      "canonicalModelKey": "lfm2-5-350m",
      "model": "LFM2.5-350M",
      "creator": "LiquidAI",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "32K",
      "contextWindowTokens": 32000,
      "displayScore": 11,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/lfm2-5-350m",
      "markdownUrl": "https://benchlm.ai/md/models/lfm2-5-350m.md",
      "id": 159,
      "releaseDate": "2026-03-31",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "lfm2-5-350m",
        "familyName": "LFM2.5-350M",
        "variantType": "instruct",
        "snapshotLabel": null,
        "baseFamilyModelKey": "lfm2-5-350m",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 11,
        "overallScore": 11,
        "rawOverallScore": 11,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 0.5,
          "multilingual": null,
          "instructionFollowing": 36.5,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {
          "instructionFollowing": 105
        },
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": true,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 3,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "nemotron-3-super-120b-a12b",
      "canonicalModelKey": "nemotron-3-super-120b-a12b",
      "model": "Nemotron 3 Super 120B A12B",
      "creator": "NVIDIA",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "256K",
      "contextWindowTokens": 256000,
      "displayScore": 10,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/nemotron-3-super-120b-a12b",
      "markdownUrl": "https://benchlm.ai/md/models/nemotron-3-super-120b-a12b.md",
      "id": 187,
      "releaseDate": "2026-01-15",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "nemotron-3-super-120b-a12b",
        "familyName": "Nemotron 3 Super 120B A12B",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "nemotron-3-super-120b-a12b",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 10,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 0,
        "generatedBenchmarkCount": 50,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "granite-4-0-h-1b",
      "canonicalModelKey": "granite-4-0-h-1b",
      "model": "Granite-4.0-H-1B",
      "creator": "IBM",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 9,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/granite-4-0-h-1b",
      "markdownUrl": "https://benchlm.ai/md/models/granite-4-0-h-1b.md",
      "id": 152,
      "releaseDate": "2025-10-28",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "granite-4-0-1b",
        "familyName": "Granite 4.0 1B",
        "variantType": "hybrid",
        "snapshotLabel": null,
        "baseFamilyModelKey": "granite-4-0-h-1b",
        "relatedModelKeys": [
          "granite-4-0-1b"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 9,
        "overallScore": 11,
        "rawOverallScore": 11,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 0.1,
          "multilingual": null,
          "instructionFollowing": 38,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {
          "multilingual": 104,
          "instructionFollowing": 99
        },
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": true,
          "instructionFollowing": true,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 7,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 6.53,
          "tau2Bench": 19.6,
          "gdpvalAaNormalized": 0,
          "gdpvalAa": 268
        },
        "coding": {
          "aaCodingIndex": 2.74,
          "terminalBenchHard": 0,
          "aaSciCode": 8.2
        },
        "reasoning": {
          "lcr": 6.3,
          "critpt": 0
        },
        "multimodalGrounded": {},
        "knowledge": {
          "artificialAnalysis": 7.99,
          "aaGpqaDiamond": 26.3,
          "aaHle": 5,
          "aaOmniscienceIndex": -73.6,
          "omniscienceAccuracy": 5.3,
          "omniscienceHallucinationRate": 83.4
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 26.2
        },
        "math": {}
      }
    },
    {
      "slug": "seed-1-6",
      "canonicalModelKey": "seed-1-6",
      "model": "Seed 1.6",
      "creator": "ByteDance",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "256K",
      "contextWindowTokens": 256000,
      "displayScore": 7,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/seed-1-6",
      "markdownUrl": "https://benchlm.ai/md/models/seed-1-6.md",
      "id": 183,
      "releaseDate": "2025-06-01",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "seed-1-6",
        "familyName": "Seed 1.6",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "seed-1-6",
        "relatedModelKeys": [
          "seed-1-6-flash"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 7,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 0,
        "generatedBenchmarkCount": 32,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "qwen2-5-coder-32b-instruct",
      "canonicalModelKey": "qwen2-5-coder-32b-instruct",
      "model": "Qwen2.5 Coder 32B Instruct",
      "creator": "Alibaba",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 6,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/qwen2-5-coder-32b-instruct",
      "markdownUrl": "https://benchlm.ai/md/models/qwen2-5-coder-32b-instruct.md",
      "id": 216,
      "releaseDate": "2025-01-01",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "qwen2-5-coder",
        "familyName": "Qwen2.5 Coder",
        "variantType": "32b-instruct",
        "snapshotLabel": null,
        "baseFamilyModelKey": "qwen2-5-coder-32b-instruct",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 6,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 1,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {
          "aaSciCode": 27.1
        },
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {
          "artificialAnalysis": 12.87,
          "aaGpqaDiamond": 41.7,
          "aaHle": 3.8
        },
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "seed-2-0-lite",
      "canonicalModelKey": "seed-2-0-lite",
      "model": "Seed-2.0-Lite",
      "creator": "ByteDance",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "256K",
      "contextWindowTokens": 256000,
      "displayScore": 6,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/seed-2-0-lite",
      "markdownUrl": "https://benchlm.ai/md/models/seed-2-0-lite.md",
      "id": 185,
      "releaseDate": "2026-03-10",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "seed-2-0",
        "familyName": "Seed 2.0",
        "variantType": "lite",
        "snapshotLabel": null,
        "baseFamilyModelKey": "seed-2-0-lite",
        "relatedModelKeys": [
          "seed-2-0-mini"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 6,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 0,
        "generatedBenchmarkCount": 32,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "deepseek-r1-distill-qwen-32b",
      "canonicalModelKey": "deepseek-r1-distill-qwen-32b",
      "model": "DeepSeek R1 Distill Qwen 32B",
      "creator": "DeepSeek",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 6,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/deepseek-r1-distill-qwen-32b",
      "markdownUrl": "https://benchlm.ai/md/models/deepseek-r1-distill-qwen-32b.md",
      "id": 217,
      "releaseDate": "2025-01-20",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "deepseek-r1-distill",
        "familyName": "DeepSeek R1 Distill",
        "variantType": "qwen-32b",
        "snapshotLabel": null,
        "baseFamilyModelKey": "deepseek-r1-distill-qwen-32b",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 6,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 1,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {
          "aaSciCode": 37.6
        },
        "reasoning": {
          "lcr": 9.7
        },
        "multimodalGrounded": {},
        "knowledge": {
          "artificialAnalysis": 17.17,
          "aaGpqaDiamond": 61.5,
          "aaHle": 5.5
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 22.9
        },
        "math": {}
      }
    },
    {
      "slug": "ministral-3-14b-reasoning",
      "canonicalModelKey": "ministral-3-14b-reasoning",
      "model": "Ministral 3 14B (Reasoning)",
      "creator": "Mistral",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 5,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/ministral-3-14b-reasoning",
      "markdownUrl": "https://benchlm.ai/md/models/ministral-3-14b-reasoning.md",
      "id": 188,
      "releaseDate": "2025-12-02",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "ministral-3-14b",
        "familyName": "Ministral 3 14B",
        "variantType": "reasoning",
        "snapshotLabel": null,
        "baseFamilyModelKey": "ministral-3-14b",
        "relatedModelKeys": [
          "ministral-3-14b"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 5,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 0,
        "generatedBenchmarkCount": 49,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "ministral-3-14b",
      "canonicalModelKey": "ministral-3-14b",
      "model": "Ministral 3 14B",
      "creator": "Mistral",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 5,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/ministral-3-14b",
      "markdownUrl": "https://benchlm.ai/md/models/ministral-3-14b.md",
      "id": 192,
      "releaseDate": "2025-12-02",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "ministral-3-14b",
        "familyName": "Ministral 3 14B",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "ministral-3-14b",
        "relatedModelKeys": [
          "ministral-3-14b-reasoning"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 5,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 0,
        "generatedBenchmarkCount": 49,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "aion-2-0",
      "canonicalModelKey": "aion-2-0",
      "model": "Aion-2.0",
      "creator": "Aion Labs",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 5,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/aion-2-0",
      "markdownUrl": "https://benchlm.ai/md/models/aion-2-0.md",
      "id": 190,
      "releaseDate": null,
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "aion-2-0",
        "familyName": "Aion-2.0",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "aion-2-0",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 5,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 0,
        "generatedBenchmarkCount": 32,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "seed-1-6-flash",
      "canonicalModelKey": "seed-1-6-flash",
      "model": "Seed 1.6 Flash",
      "creator": "ByteDance",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "256K",
      "contextWindowTokens": 256000,
      "displayScore": 4,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/seed-1-6-flash",
      "markdownUrl": "https://benchlm.ai/md/models/seed-1-6-flash.md",
      "id": 191,
      "releaseDate": "2025-06-01",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "seed-1-6",
        "familyName": "Seed 1.6",
        "variantType": "flash",
        "snapshotLabel": null,
        "baseFamilyModelKey": "seed-1-6",
        "relatedModelKeys": [
          "seed-1-6"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 4,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 0,
        "generatedBenchmarkCount": 32,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "minimax-m1-80k",
      "canonicalModelKey": "minimax-m1-80k",
      "model": "MiniMax M1 80k",
      "creator": "MiniMax",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "80K",
      "contextWindowTokens": 80000,
      "displayScore": 4,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/minimax-m1-80k",
      "markdownUrl": "https://benchlm.ai/md/models/minimax-m1-80k.md",
      "id": 195,
      "releaseDate": "2025-01-15",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "minimax-m1-80k",
        "familyName": "MiniMax M1 80k",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "minimax-m1-80k",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 4,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 0,
        "generatedBenchmarkCount": 48,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "solar-pro-2",
      "canonicalModelKey": "solar-pro-2",
      "model": "Solar Pro 2",
      "creator": "Upstage",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 4,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/solar-pro-2",
      "markdownUrl": "https://benchlm.ai/md/models/solar-pro-2.md",
      "id": 208,
      "releaseDate": null,
      "market": "Korea",
      "isRegional": true,
      "family": {
        "familyKey": "solar",
        "familyName": "Solar",
        "variantType": null,
        "snapshotLabel": null,
        "baseFamilyModelKey": "solar-pro-2",
        "relatedModelKeys": [
          "solar-wbl"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 4,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 0,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 12.71,
          "tau2Bench": 31.9,
          "gdpvalAaNormalized": 0,
          "gdpvalAa": 443
        },
        "coding": {
          "aaCodingIndex": 11.29,
          "terminalBenchHard": 4.5,
          "aaSciCode": 24.8
        },
        "reasoning": {
          "lcr": 0,
          "critpt": 0
        },
        "multimodalGrounded": {},
        "knowledge": {
          "artificialAnalysis": 13.59,
          "aaGpqaDiamond": 56.1,
          "aaHle": 3.8,
          "aaOmniscienceIndex": -61.7,
          "omniscienceAccuracy": 15.6,
          "omniscienceHallucinationRate": 91.5
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 33.7
        },
        "math": {},
        "korean": {}
      }
    },
    {
      "slug": "seed-2-0-mini",
      "canonicalModelKey": "seed-2-0-mini",
      "model": "Seed-2.0-Mini",
      "creator": "ByteDance",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "256K",
      "contextWindowTokens": 256000,
      "displayScore": 3,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/seed-2-0-mini",
      "markdownUrl": "https://benchlm.ai/md/models/seed-2-0-mini.md",
      "id": 193,
      "releaseDate": "2026-03-10",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "seed-2-0",
        "familyName": "Seed 2.0",
        "variantType": "mini",
        "snapshotLabel": null,
        "baseFamilyModelKey": "seed-2-0-lite",
        "relatedModelKeys": [
          "seed-2-0-lite"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 3,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 0,
        "generatedBenchmarkCount": 32,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "ministral-3-8b-reasoning",
      "canonicalModelKey": "ministral-3-8b-reasoning",
      "model": "Ministral 3 8B (Reasoning)",
      "creator": "Mistral",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 3,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/ministral-3-8b-reasoning",
      "markdownUrl": "https://benchlm.ai/md/models/ministral-3-8b-reasoning.md",
      "id": 197,
      "releaseDate": "2025-12-02",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "ministral-3-8b",
        "familyName": "Ministral 3 8B",
        "variantType": "reasoning",
        "snapshotLabel": null,
        "baseFamilyModelKey": "ministral-3-8b",
        "relatedModelKeys": [
          "ministral-3-8b"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 3,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 0,
        "generatedBenchmarkCount": 49,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "ministral-3-8b",
      "canonicalModelKey": "ministral-3-8b",
      "model": "Ministral 3 8B",
      "creator": "Mistral",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 3,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/ministral-3-8b",
      "markdownUrl": "https://benchlm.ai/md/models/ministral-3-8b.md",
      "id": 199,
      "releaseDate": "2025-12-02",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "ministral-3-8b",
        "familyName": "Ministral 3 8B",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "ministral-3-8b",
        "relatedModelKeys": [
          "ministral-3-8b-reasoning"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 3,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 0,
        "generatedBenchmarkCount": 49,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "lfm2-24b-a2b",
      "canonicalModelKey": "lfm2-24b-a2b",
      "model": "LFM2-24B-A2B",
      "creator": "LiquidAI",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "32K",
      "contextWindowTokens": 32000,
      "displayScore": 2,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/lfm2-24b-a2b",
      "markdownUrl": "https://benchlm.ai/md/models/lfm2-24b-a2b.md",
      "id": 196,
      "releaseDate": "2026-01-10",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "lfm2-24b-a2b",
        "familyName": "LFM2-24B-A2B",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "lfm2-24b-a2b",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 2,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 0,
        "generatedBenchmarkCount": 48,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "lfm2-5-1-2b-thinking",
      "canonicalModelKey": "lfm2-5-1-2b-thinking",
      "model": "LFM2.5-1.2B-Thinking",
      "creator": "LiquidAI",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "32K",
      "contextWindowTokens": 32000,
      "displayScore": 1,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/lfm2-5-1-2b-thinking",
      "markdownUrl": "https://benchlm.ai/md/models/lfm2-5-1-2b-thinking.md",
      "id": 198,
      "releaseDate": "2026-03-01",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "lfm2-5-1-2b",
        "familyName": "LFM2.5 1.2B",
        "variantType": "reasoning",
        "snapshotLabel": null,
        "baseFamilyModelKey": "lfm2-5-1-2b-instruct",
        "relatedModelKeys": [
          "lfm2-5-1-2b-instruct"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 1,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 0,
        "generatedBenchmarkCount": 48,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "ministral-3-3b-reasoning",
      "canonicalModelKey": "ministral-3-3b-reasoning",
      "model": "Ministral 3 3B (Reasoning)",
      "creator": "Mistral",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 1,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/ministral-3-3b-reasoning",
      "markdownUrl": "https://benchlm.ai/md/models/ministral-3-3b-reasoning.md",
      "id": 200,
      "releaseDate": "2025-12-02",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "ministral-3-3b",
        "familyName": "Ministral 3 3B",
        "variantType": "reasoning",
        "snapshotLabel": null,
        "baseFamilyModelKey": "ministral-3-3b",
        "relatedModelKeys": [
          "ministral-3-3b"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 1,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 0,
        "generatedBenchmarkCount": 49,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "ministral-3-3b",
      "canonicalModelKey": "ministral-3-3b",
      "model": "Ministral 3 3B",
      "creator": "Mistral",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 1,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/ministral-3-3b",
      "markdownUrl": "https://benchlm.ai/md/models/ministral-3-3b.md",
      "id": 202,
      "releaseDate": "2025-12-02",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "ministral-3-3b",
        "familyName": "Ministral 3 3B",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "ministral-3-3b",
        "relatedModelKeys": [
          "ministral-3-3b-reasoning"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 1,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 0,
        "generatedBenchmarkCount": 49,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "exaone-4-0-1-2b",
      "canonicalModelKey": "exaone-4-0-1-2b",
      "model": "Exaone 4.0 1.2B",
      "creator": "LG AI Research",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 1,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/exaone-4-0-1-2b",
      "markdownUrl": "https://benchlm.ai/md/models/exaone-4-0-1-2b.md",
      "id": 205,
      "releaseDate": null,
      "market": "Korea",
      "isRegional": true,
      "family": {
        "familyKey": "exaone-4",
        "familyName": "Exaone 4.0",
        "variantType": "1-2b",
        "snapshotLabel": null,
        "baseFamilyModelKey": "exaone-4-0-32b",
        "relatedModelKeys": [
          "exaone-4-0-32b"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 1,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 0,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 6.82,
          "tau2Bench": 20.5,
          "gdpvalAaNormalized": 0,
          "gdpvalAa": 293
        },
        "coding": {
          "aaCodingIndex": 2.47,
          "terminalBenchHard": 0,
          "aaSciCode": 7.4
        },
        "reasoning": {
          "lcr": 0,
          "critpt": 0
        },
        "multimodalGrounded": {},
        "knowledge": {
          "artificialAnalysis": 8.11,
          "aaGpqaDiamond": 42.4,
          "aaHle": 5.8,
          "aaOmniscienceIndex": -82.6,
          "omniscienceAccuracy": 4.7,
          "omniscienceHallucinationRate": 91.5
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 25.3
        },
        "math": {},
        "korean": {}
      }
    },
    {
      "slug": "step-3-5-flash",
      "canonicalModelKey": "step-3-5-flash",
      "model": "Step 3.5 Flash",
      "creator": "StepFun",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "256K",
      "contextWindowTokens": 256000,
      "displayScore": 0,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/step-3-5-flash",
      "markdownUrl": "https://benchlm.ai/md/models/step-3-5-flash.md",
      "id": 181,
      "releaseDate": "2026-01-20",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "step-3-5-flash",
        "familyName": "Step 3.5 Flash",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "step-3-5-flash",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 0,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 0,
        "generatedBenchmarkCount": 55,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "minimax-m2-5",
      "canonicalModelKey": "minimax-m2-5",
      "model": "MiniMax M2.5",
      "creator": "MiniMax",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 0,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/minimax-m2-5",
      "markdownUrl": "https://benchlm.ai/md/models/minimax-m2-5.md",
      "id": 189,
      "releaseDate": "2025-10-01",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "minimax-m2-5",
        "familyName": "MiniMax M2.5",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "minimax-m2-5",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 0,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 1,
        "verifiedBenchmarkCount": 1,
        "rankableBenchmarkCount": 1,
        "generatedBenchmarkCount": 52,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {
          "vibeCodeBench": 14.852
        },
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "gpt-5-mini",
      "canonicalModelKey": "gpt-5-mini",
      "model": "GPT-5 mini",
      "creator": "OpenAI",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 0,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/gpt-5-mini",
      "markdownUrl": "https://benchlm.ai/md/models/gpt-5-mini.md",
      "id": 182,
      "releaseDate": "2025-08-07",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "gpt-5",
        "familyName": "GPT-5",
        "variantType": "mini",
        "snapshotLabel": null,
        "baseFamilyModelKey": "gpt-5-high",
        "relatedModelKeys": [
          "gpt-5-high",
          "gpt-5-medium",
          "gpt-5-nano"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 0,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 1,
        "verifiedBenchmarkCount": 1,
        "rankableBenchmarkCount": 1,
        "generatedBenchmarkCount": 59,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {
          "vibeCodeBench": 14.171
        },
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {},
        "korean": {}
      }
    },
    {
      "slug": "lfm2-5-1-2b-instruct",
      "canonicalModelKey": "lfm2-5-1-2b-instruct",
      "model": "LFM2.5-1.2B-Instruct",
      "creator": "LiquidAI",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "32K",
      "contextWindowTokens": 32000,
      "displayScore": 0,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/lfm2-5-1-2b-instruct",
      "markdownUrl": "https://benchlm.ai/md/models/lfm2-5-1-2b-instruct.md",
      "id": 201,
      "releaseDate": "2026-03-01",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "lfm2-5-1-2b",
        "familyName": "LFM2.5 1.2B",
        "variantType": "instruct",
        "snapshotLabel": null,
        "baseFamilyModelKey": "lfm2-5-1-2b-instruct",
        "relatedModelKeys": [
          "lfm2-5-1-2b-thinking"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 0,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 0,
        "generatedBenchmarkCount": 48,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "lfm2-5-vl-450m",
      "canonicalModelKey": "lfm2-5-vl-450m",
      "model": "LFM2.5-VL-450M",
      "creator": "LiquidAI",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 0,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/lfm2-5-vl-450m",
      "markdownUrl": "https://benchlm.ai/md/models/lfm2-5-vl-450m.md",
      "id": 168,
      "releaseDate": "2026-04-08",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "lfm2-5-vl-450m",
        "familyName": "LFM2.5-VL-450M",
        "variantType": "vl",
        "snapshotLabel": null,
        "baseFamilyModelKey": "lfm2-5-vl-450m",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 0,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": 33,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": 21.6,
          "multilingual": null,
          "instructionFollowing": 61.2,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {
          "instructionFollowing": 130
        },
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": true,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 7,
        "verifiedBenchmarkCount": 7,
        "rankableBenchmarkCount": 7,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "bfclV4": 21.08
        },
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {
          "mmmu": 32.67,
          "realWorldQa": 58.43,
          "countBench": 73.31
        },
        "knowledge": {
          "gpqa": 25.66,
          "mmluPro": 19.32
        },
        "multilingual": {},
        "instructionFollowing": {
          "ifeval": 61.16
        },
        "math": {}
      }
    },
    {
      "slug": "lfm2-5-vl-1-6b-extract",
      "canonicalModelKey": "lfm2-5-vl-1-6b-extract",
      "model": "LFM2.5-VL-1.6B-Extract",
      "creator": "LiquidAI",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 0,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/lfm2-5-vl-1-6b-extract",
      "markdownUrl": "https://benchlm.ai/md/models/lfm2-5-vl-1-6b-extract.md",
      "id": 256,
      "releaseDate": "2026-05-26",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "lfm2-5-vl-extract",
        "familyName": "LFM2.5-VL Extract",
        "variantType": "1-6b",
        "snapshotLabel": null,
        "baseFamilyModelKey": "lfm2-5-vl-1-6b-extract",
        "relatedModelKeys": [
          "lfm2-5-vl-450m-extract",
          "lfm2-5-vl-450m"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 0,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 3,
        "verifiedBenchmarkCount": 3,
        "rankableBenchmarkCount": 3,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 2.83,
          "tau2Bench": 8.5,
          "gdpvalAaNormalized": 0,
          "gdpvalAa": 232
        },
        "coding": {
          "aaCodingIndex": 1,
          "terminalBenchHard": 0,
          "aaSciCode": 3
        },
        "reasoning": {
          "lcr": 0,
          "critpt": 0
        },
        "multimodalGrounded": {
          "liquidExtractJsonValidity": 99.6,
          "liquidExtractSchemaF1": 99.6,
          "liquidExtractVlmJudge": 90.6,
          "aaMmmuPro": 26.5
        },
        "knowledge": {
          "artificialAnalysis": 6.18,
          "aaGpqaDiamond": 28.9,
          "aaHle": 5.1,
          "aaOmniscienceIndex": -83.9,
          "omniscienceAccuracy": 5.2,
          "omniscienceHallucinationRate": 94
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 33.1
        },
        "math": {}
      }
    },
    {
      "slug": "lfm2-5-vl-450m-extract",
      "canonicalModelKey": "lfm2-5-vl-450m-extract",
      "model": "LFM2.5-VL-450M-Extract",
      "creator": "LiquidAI",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 0,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/lfm2-5-vl-450m-extract",
      "markdownUrl": "https://benchlm.ai/md/models/lfm2-5-vl-450m-extract.md",
      "id": 255,
      "releaseDate": "2026-05-26",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "lfm2-5-vl-extract",
        "familyName": "LFM2.5-VL Extract",
        "variantType": "450m",
        "snapshotLabel": null,
        "baseFamilyModelKey": "lfm2-5-vl-1-6b-extract",
        "relatedModelKeys": [
          "lfm2-5-vl-1-6b-extract",
          "lfm2-5-vl-450m"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 0,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 3,
        "verifiedBenchmarkCount": 3,
        "rankableBenchmarkCount": 3,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {
          "liquidExtractJsonValidity": 98.9,
          "liquidExtractSchemaF1": 98.8,
          "liquidExtractVlmJudge": 84.5
        },
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "kimi-k2-7-code",
      "canonicalModelKey": "kimi-k2-7-code",
      "model": "Kimi K2.7 Code",
      "creator": "Moonshot AI",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "256K",
      "contextWindowTokens": 256000,
      "displayScore": 0,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/kimi-k2-7-code",
      "markdownUrl": "https://benchlm.ai/md/models/kimi-k2-7-code.md",
      "id": 258,
      "releaseDate": "2026-06-12",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "kimi-k2-7-code",
        "familyName": "Kimi K2.7 Code",
        "variantType": "code",
        "snapshotLabel": null,
        "baseFamilyModelKey": "kimi-k2-7-code",
        "relatedModelKeys": [
          "kimi-2-6",
          "kimi-k2-5"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": "kimi-2-6"
      },
      "scores": {
        "displayScore": 0,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 1,
        "verifiedBenchmarkCount": 1,
        "rankableBenchmarkCount": 1,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "kimiClaw247": 46.9,
          "mcpAtlas": 76,
          "mcpMarkVerified": 81.1
        },
        "coding": {
          "kimiCodeBenchV2": 62,
          "programBench": 53.6,
          "mlsBenchLite": 35.1
        },
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "holo3-1-35b-a3b",
      "canonicalModelKey": "holo3-1-35b-a3b",
      "model": "Holo3.1-35B-A3B",
      "creator": "H Company",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "262K",
      "contextWindowTokens": 262000,
      "displayScore": 0,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/holo3-1-35b-a3b",
      "markdownUrl": "https://benchlm.ai/md/models/holo3-1-35b-a3b.md",
      "id": 242,
      "releaseDate": "2026-06-01",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "holo3-1",
        "familyName": "Holo3.1",
        "variantType": "35b-a3b",
        "snapshotLabel": null,
        "baseFamilyModelKey": "holo3-1-35b-a3b",
        "relatedModelKeys": [
          "holo3-1-35b-a3b-fp8",
          "holo3-1-35b-a3b-nvfp4",
          "holo3-1-35b-a3b-gguf",
          "holo3-1-9b",
          "holo3-1-4b",
          "holo3-1-0-8b"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": "holo3-35b-a3b"
      },
      "scores": {
        "displayScore": 0,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 1,
        "verifiedBenchmarkCount": 1,
        "rankableBenchmarkCount": 1,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "androidWorld": 79.3
        },
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "holo3-1-4b",
      "canonicalModelKey": "holo3-1-4b",
      "model": "Holo3.1-4B",
      "creator": "H Company",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "262K",
      "contextWindowTokens": 262000,
      "displayScore": 0,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/holo3-1-4b",
      "markdownUrl": "https://benchlm.ai/md/models/holo3-1-4b.md",
      "id": 247,
      "releaseDate": "2026-06-01",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "holo3-1",
        "familyName": "Holo3.1",
        "variantType": "4b",
        "snapshotLabel": null,
        "baseFamilyModelKey": "holo3-1-35b-a3b",
        "relatedModelKeys": [
          "holo3-1-35b-a3b",
          "holo3-1-35b-a3b-fp8",
          "holo3-1-35b-a3b-nvfp4",
          "holo3-1-35b-a3b-gguf",
          "holo3-1-9b",
          "holo3-1-0-8b"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": "holo2-4b"
      },
      "scores": {
        "displayScore": 0,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 1,
        "verifiedBenchmarkCount": 1,
        "rankableBenchmarkCount": 1,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "androidWorld": 71
        },
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "holo3-1-9b",
      "canonicalModelKey": "holo3-1-9b",
      "model": "Holo3.1-9B",
      "creator": "H Company",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "262K",
      "contextWindowTokens": 262000,
      "displayScore": 0,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/holo3-1-9b",
      "markdownUrl": "https://benchlm.ai/md/models/holo3-1-9b.md",
      "id": 246,
      "releaseDate": "2026-06-01",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "holo3-1",
        "familyName": "Holo3.1",
        "variantType": "9b",
        "snapshotLabel": null,
        "baseFamilyModelKey": "holo3-1-35b-a3b",
        "relatedModelKeys": [
          "holo3-1-35b-a3b",
          "holo3-1-35b-a3b-fp8",
          "holo3-1-35b-a3b-nvfp4",
          "holo3-1-35b-a3b-gguf",
          "holo3-1-4b",
          "holo3-1-0-8b"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": "holo2-8b"
      },
      "scores": {
        "displayScore": 0,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 1,
        "verifiedBenchmarkCount": 1,
        "rankableBenchmarkCount": 1,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "androidWorld": 71
        },
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "composer-2-fast",
      "canonicalModelKey": "composer-2-fast",
      "model": "Composer 2 Fast",
      "creator": "Cursor",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "200K",
      "contextWindowTokens": 200000,
      "displayScore": 0,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/composer-2-fast",
      "markdownUrl": "https://benchlm.ai/md/models/composer-2-fast.md",
      "id": 176,
      "releaseDate": "2026-03-19",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "composer",
        "familyName": "Composer",
        "variantType": "fast",
        "snapshotLabel": null,
        "baseFamilyModelKey": "composer-2",
        "relatedModelKeys": [
          "composer-2"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 0,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 1,
        "verifiedBenchmarkCount": 1,
        "rankableBenchmarkCount": 1,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {
          "reactNativeEvals": 94.9
        },
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "qwen3-5-plus",
      "canonicalModelKey": "qwen3-5-plus",
      "model": "Qwen3.5 Plus",
      "creator": "Alibaba",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 0,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/qwen3-5-plus",
      "markdownUrl": "https://benchlm.ai/md/models/qwen3-5-plus.md",
      "id": 227,
      "releaseDate": "2026-03-04",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "qwen3-5-plus",
        "familyName": "Qwen3.5 Plus",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "qwen3-5-plus",
        "relatedModelKeys": [
          "qwen3-5-397b",
          "qwen3-5-397b-reasoning"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 0,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 1,
        "verifiedBenchmarkCount": 1,
        "rankableBenchmarkCount": 2,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {
          "vibeCodeBench": 15.738
        },
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "claude-haiku-4-5-thinking",
      "canonicalModelKey": "claude-haiku-4-5-thinking",
      "model": "Claude Haiku 4.5 Thinking",
      "creator": "Anthropic",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "200K",
      "contextWindowTokens": 200000,
      "displayScore": 0,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/claude-haiku-4-5-thinking",
      "markdownUrl": "https://benchlm.ai/md/models/claude-haiku-4-5-thinking.md",
      "id": 232,
      "releaseDate": "2025-10-15",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "claude-haiku-4-5",
        "familyName": "Claude Haiku 4.5",
        "variantType": "reasoning",
        "snapshotLabel": "thinking",
        "baseFamilyModelKey": "claude-haiku-4-5",
        "relatedModelKeys": [
          "claude-haiku-4-5"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 0,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 1,
        "verifiedBenchmarkCount": 1,
        "rankableBenchmarkCount": 2,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {
          "vibeCodeBench": 11.393
        },
        "reasoning": {},
        "multimodalGrounded": {
          "designArenaWebsite": 1167
        },
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "claude-sonnet-4-5-thinking",
      "canonicalModelKey": "claude-sonnet-4-5-thinking",
      "model": "Claude Sonnet 4.5 Thinking",
      "creator": "Anthropic",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "200K",
      "contextWindowTokens": 200000,
      "displayScore": 0,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/claude-sonnet-4-5-thinking",
      "markdownUrl": "https://benchlm.ai/md/models/claude-sonnet-4-5-thinking.md",
      "id": 229,
      "releaseDate": "2025-09-01",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "claude-sonnet-4-5",
        "familyName": "Claude Sonnet 4.5",
        "variantType": "reasoning",
        "snapshotLabel": "thinking",
        "baseFamilyModelKey": "claude-sonnet-4-5",
        "relatedModelKeys": [
          "claude-sonnet-4-5"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 0,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 1,
        "verifiedBenchmarkCount": 1,
        "rankableBenchmarkCount": 2,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {
          "vibeCodeBench": 22.621
        },
        "reasoning": {},
        "multimodalGrounded": {
          "designArenaWebsite": 1235
        },
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "holo2-235b-a22b",
      "canonicalModelKey": "holo2-235b-a22b",
      "model": "Holo2-235B-A22B",
      "creator": "H Company",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "262K",
      "contextWindowTokens": 262000,
      "displayScore": 0,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/holo2-235b-a22b",
      "markdownUrl": "https://benchlm.ai/md/models/holo2-235b-a22b.md",
      "id": 223,
      "releaseDate": null,
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "holo2",
        "familyName": "Holo2",
        "variantType": "235b-a22b",
        "snapshotLabel": null,
        "baseFamilyModelKey": "holo2-235b-a22b",
        "relatedModelKeys": [
          "holo2-30b-a3b",
          "holo2-8b",
          "holo2-4b"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 0,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 1,
        "verifiedBenchmarkCount": 1,
        "rankableBenchmarkCount": 1,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {
          "screenSpotPro": 70.6
        },
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "holo2-30b-a3b",
      "canonicalModelKey": "holo2-30b-a3b",
      "model": "Holo2-30B-A3B",
      "creator": "H Company",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "262K",
      "contextWindowTokens": 262000,
      "displayScore": 0,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/holo2-30b-a3b",
      "markdownUrl": "https://benchlm.ai/md/models/holo2-30b-a3b.md",
      "id": 224,
      "releaseDate": null,
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "holo2",
        "familyName": "Holo2",
        "variantType": "30b-a3b",
        "snapshotLabel": null,
        "baseFamilyModelKey": "holo2-235b-a22b",
        "relatedModelKeys": [
          "holo2-235b-a22b",
          "holo2-8b",
          "holo2-4b"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 0,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 1,
        "verifiedBenchmarkCount": 1,
        "rankableBenchmarkCount": 1,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {
          "screenSpotPro": 66.1
        },
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "holo2-4b",
      "canonicalModelKey": "holo2-4b",
      "model": "Holo2-4B",
      "creator": "H Company",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "262K",
      "contextWindowTokens": 262000,
      "displayScore": 0,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/holo2-4b",
      "markdownUrl": "https://benchlm.ai/md/models/holo2-4b.md",
      "id": 226,
      "releaseDate": null,
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "holo2",
        "familyName": "Holo2",
        "variantType": "4b",
        "snapshotLabel": null,
        "baseFamilyModelKey": "holo2-235b-a22b",
        "relatedModelKeys": [
          "holo2-235b-a22b",
          "holo2-30b-a3b",
          "holo2-8b"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 0,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 1,
        "verifiedBenchmarkCount": 1,
        "rankableBenchmarkCount": 1,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {
          "screenSpotPro": 57.2
        },
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "holo2-8b",
      "canonicalModelKey": "holo2-8b",
      "model": "Holo2-8B",
      "creator": "H Company",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "262K",
      "contextWindowTokens": 262000,
      "displayScore": 0,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/holo2-8b",
      "markdownUrl": "https://benchlm.ai/md/models/holo2-8b.md",
      "id": 225,
      "releaseDate": null,
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "holo2",
        "familyName": "Holo2",
        "variantType": "8b",
        "snapshotLabel": null,
        "baseFamilyModelKey": "holo2-235b-a22b",
        "relatedModelKeys": [
          "holo2-235b-a22b",
          "holo2-30b-a3b",
          "holo2-4b"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 0,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 1,
        "verifiedBenchmarkCount": 1,
        "rankableBenchmarkCount": 1,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {
          "screenSpotPro": 58.9
        },
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "holo3-1-0-8b",
      "canonicalModelKey": "holo3-1-0-8b",
      "model": "Holo3.1-0.8B",
      "creator": "H Company",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "262K",
      "contextWindowTokens": 262000,
      "displayScore": 0,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/holo3-1-0-8b",
      "markdownUrl": "https://benchlm.ai/md/models/holo3-1-0-8b.md",
      "id": 248,
      "releaseDate": "2026-06-01",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "holo3-1",
        "familyName": "Holo3.1",
        "variantType": "0-8b",
        "snapshotLabel": null,
        "baseFamilyModelKey": "holo3-1-35b-a3b",
        "relatedModelKeys": [
          "holo3-1-35b-a3b",
          "holo3-1-35b-a3b-fp8",
          "holo3-1-35b-a3b-nvfp4",
          "holo3-1-35b-a3b-gguf",
          "holo3-1-9b",
          "holo3-1-4b"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 0,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 0,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "holo3-1-35b-a3b-fp8",
      "canonicalModelKey": "holo3-1-35b-a3b-fp8",
      "model": "Holo3.1-35B-A3B-FP8",
      "creator": "H Company",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "262K",
      "contextWindowTokens": 262000,
      "displayScore": 0,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/holo3-1-35b-a3b-fp8",
      "markdownUrl": "https://benchlm.ai/md/models/holo3-1-35b-a3b-fp8.md",
      "id": 243,
      "releaseDate": "2026-06-01",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "holo3-1",
        "familyName": "Holo3.1",
        "variantType": "35b-a3b-fp8",
        "snapshotLabel": null,
        "baseFamilyModelKey": "holo3-1-35b-a3b",
        "relatedModelKeys": [
          "holo3-1-35b-a3b",
          "holo3-1-35b-a3b-nvfp4",
          "holo3-1-35b-a3b-gguf",
          "holo3-1-9b",
          "holo3-1-4b",
          "holo3-1-0-8b"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": "holo3-1-35b-a3b"
      },
      "scores": {
        "displayScore": 0,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 0,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "holo3-1-35b-a3b-gguf",
      "canonicalModelKey": "holo3-1-35b-a3b-gguf",
      "model": "Holo3.1-35B-A3B-GGUF",
      "creator": "H Company",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "262K",
      "contextWindowTokens": 262000,
      "displayScore": 0,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/holo3-1-35b-a3b-gguf",
      "markdownUrl": "https://benchlm.ai/md/models/holo3-1-35b-a3b-gguf.md",
      "id": 245,
      "releaseDate": "2026-06-01",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "holo3-1",
        "familyName": "Holo3.1",
        "variantType": "35b-a3b-gguf",
        "snapshotLabel": null,
        "baseFamilyModelKey": "holo3-1-35b-a3b",
        "relatedModelKeys": [
          "holo3-1-35b-a3b",
          "holo3-1-35b-a3b-fp8",
          "holo3-1-35b-a3b-nvfp4",
          "holo3-1-9b",
          "holo3-1-4b",
          "holo3-1-0-8b"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": "holo3-1-35b-a3b"
      },
      "scores": {
        "displayScore": 0,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 0,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "holo3-1-35b-a3b-nvfp4",
      "canonicalModelKey": "holo3-1-35b-a3b-nvfp4",
      "model": "Holo3.1-35B-A3B-NVFP4",
      "creator": "H Company",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "262K",
      "contextWindowTokens": 262000,
      "displayScore": 0,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/holo3-1-35b-a3b-nvfp4",
      "markdownUrl": "https://benchlm.ai/md/models/holo3-1-35b-a3b-nvfp4.md",
      "id": 244,
      "releaseDate": "2026-06-01",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "holo3-1",
        "familyName": "Holo3.1",
        "variantType": "35b-a3b-nvfp4",
        "snapshotLabel": null,
        "baseFamilyModelKey": "holo3-1-35b-a3b",
        "relatedModelKeys": [
          "holo3-1-35b-a3b",
          "holo3-1-35b-a3b-fp8",
          "holo3-1-35b-a3b-gguf",
          "holo3-1-9b",
          "holo3-1-4b",
          "holo3-1-0-8b"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": "holo3-1-35b-a3b"
      },
      "scores": {
        "displayScore": 0,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 0,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "lfm2-5-1-2b-jp-202606",
      "canonicalModelKey": "lfm2-5-1-2b-jp-202606",
      "model": "LFM2.5-1.2B-JP-202606",
      "creator": "LiquidAI",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "32K",
      "contextWindowTokens": 32000,
      "displayScore": 0,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/lfm2-5-1-2b-jp-202606",
      "markdownUrl": "https://benchlm.ai/md/models/lfm2-5-1-2b-jp-202606.md",
      "id": 254,
      "releaseDate": "2026-05-26",
      "market": "Japan",
      "isRegional": true,
      "family": {
        "familyKey": "lfm2-5-1-2b",
        "familyName": "LFM2.5-1.2B",
        "variantType": "jp-202606",
        "snapshotLabel": null,
        "baseFamilyModelKey": "lfm2-5-1-2b-instruct",
        "relatedModelKeys": [
          "lfm2-5-1-2b-instruct",
          "lfm2-5-1-2b-thinking"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 0,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 0,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "grok-build-0-1",
      "canonicalModelKey": "grok-build-0-1",
      "model": "Grok Build 0.1",
      "creator": "xAI",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "256K",
      "contextWindowTokens": 256000,
      "displayScore": 0,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/grok-build-0-1",
      "markdownUrl": "https://benchlm.ai/md/models/grok-build-0-1.md",
      "id": 173,
      "releaseDate": "2026-05-20",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "grok-build",
        "familyName": "Grok Build",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "grok-build-0-1",
        "relatedModelKeys": [
          "grok-code-fast-1"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 0,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 0,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "gertLabs": 49.15
        },
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "hy-mt1-5-1-8b-1-25bit",
      "canonicalModelKey": "hy-mt1-5-1-8b-1-25bit",
      "model": "Hy-MT1.5-1.8B-1.25bit",
      "creator": "Tencent Hunyuan",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "262K",
      "contextWindowTokens": 262000,
      "displayScore": 0,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/hy-mt1-5-1-8b-1-25bit",
      "markdownUrl": "https://benchlm.ai/md/models/hy-mt1-5-1-8b-1-25bit.md",
      "id": 237,
      "releaseDate": "2026-04-29",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "hy-mt1-5-1-8b",
        "familyName": "Hy-MT1.5-1.8B",
        "variantType": "1.25bit",
        "snapshotLabel": null,
        "baseFamilyModelKey": "hy-mt1-5-1-8b-1-25bit",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 0,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 0,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "leanstral",
      "canonicalModelKey": "leanstral",
      "model": "Leanstral",
      "creator": "Mistral",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "256K",
      "contextWindowTokens": 256000,
      "displayScore": 0,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/leanstral",
      "markdownUrl": "https://benchlm.ai/md/models/leanstral.md",
      "id": 203,
      "releaseDate": "2026-03-15",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "leanstral",
        "familyName": "Leanstral",
        "variantType": "base",
        "snapshotLabel": null,
        "baseFamilyModelKey": "leanstral",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 0,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 1,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "granite-4-0-350m",
      "canonicalModelKey": "granite-4-0-350m",
      "model": "Granite-4.0-350M",
      "creator": "IBM",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "32K",
      "contextWindowTokens": 32000,
      "displayScore": 0,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/granite-4-0-350m",
      "markdownUrl": "https://benchlm.ai/md/models/granite-4-0-350m.md",
      "id": 171,
      "releaseDate": "2025-10-28",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "granite-4-0-350m",
        "familyName": "Granite 4.0 350M",
        "variantType": "dense",
        "snapshotLabel": null,
        "baseFamilyModelKey": "granite-4-0-350m",
        "relatedModelKeys": [
          "granite-4-0-h-350m"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 0,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {
          "multilingual": 105,
          "instructionFollowing": 131
        },
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": true,
          "instructionFollowing": true,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 7,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 4.39,
          "tau2Bench": 13.2,
          "gdpvalAaNormalized": 0,
          "gdpvalAa": 268
        },
        "coding": {
          "aaCodingIndex": 0.31,
          "terminalBenchHard": 0,
          "aaSciCode": 0.9
        },
        "reasoning": {
          "lcr": 0,
          "critpt": 0
        },
        "multimodalGrounded": {},
        "knowledge": {
          "artificialAnalysis": 6.1,
          "aaGpqaDiamond": 26.1,
          "aaHle": 5.7,
          "aaOmniscienceIndex": -72.1,
          "omniscienceAccuracy": 3.2,
          "omniscienceHallucinationRate": 77.8
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 15.9
        },
        "math": {}
      }
    },
    {
      "slug": "granite-4-0-h-350m",
      "canonicalModelKey": "granite-4-0-h-350m",
      "model": "Granite-4.0-H-350M",
      "creator": "IBM",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "32K",
      "contextWindowTokens": 32000,
      "displayScore": 0,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/granite-4-0-h-350m",
      "markdownUrl": "https://benchlm.ai/md/models/granite-4-0-h-350m.md",
      "id": 172,
      "releaseDate": "2025-10-28",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "granite-4-0-350m",
        "familyName": "Granite 4.0 350M",
        "variantType": "hybrid",
        "snapshotLabel": null,
        "baseFamilyModelKey": "granite-4-0-350m",
        "relatedModelKeys": [
          "granite-4-0-350m"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 0,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {
          "multilingual": 106,
          "instructionFollowing": 132
        },
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": true,
          "instructionFollowing": true,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 7,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {
          "aaAgenticIndex": 4.87,
          "tau2Bench": 14.6,
          "gdpvalAaNormalized": 0,
          "gdpvalAa": 289
        },
        "coding": {
          "aaCodingIndex": 0.58,
          "terminalBenchHard": 0,
          "aaSciCode": 1.7
        },
        "reasoning": {
          "lcr": 0,
          "critpt": 0
        },
        "multimodalGrounded": {},
        "knowledge": {
          "artificialAnalysis": 5.44,
          "aaGpqaDiamond": 25.7,
          "aaHle": 6.4,
          "aaOmniscienceIndex": -87.2,
          "omniscienceAccuracy": 3.7,
          "omniscienceHallucinationRate": 94.4
        },
        "multilingual": {},
        "instructionFollowing": {
          "aaIfBench": 17.6
        },
        "math": {}
      }
    },
    {
      "slug": "gpt-5-nano",
      "canonicalModelKey": "gpt-5-nano",
      "model": "GPT-5 nano",
      "creator": "OpenAI",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "400K",
      "contextWindowTokens": 400000,
      "displayScore": 0,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/gpt-5-nano",
      "markdownUrl": "https://benchlm.ai/md/models/gpt-5-nano.md",
      "id": 194,
      "releaseDate": "2025-08-07",
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "gpt-5",
        "familyName": "GPT-5",
        "variantType": "nano",
        "snapshotLabel": null,
        "baseFamilyModelKey": "gpt-5-high",
        "relatedModelKeys": [
          "gpt-5-high",
          "gpt-5-medium",
          "gpt-5-mini"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 0,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 0,
        "generatedBenchmarkCount": 31,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {},
        "korean": {}
      }
    },
    {
      "slug": "skt-ax",
      "canonicalModelKey": "skt-ax",
      "model": "A.X series",
      "creator": "SK Telecom",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "64K",
      "contextWindowTokens": 64000,
      "displayScore": 0,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/skt-ax",
      "markdownUrl": "https://benchlm.ai/md/models/skt-ax.md",
      "id": 212,
      "releaseDate": null,
      "market": "Korea",
      "isRegional": true,
      "family": {
        "familyKey": "skt-ax",
        "familyName": "A.X",
        "variantType": null,
        "snapshotLabel": null,
        "baseFamilyModelKey": "skt-ax",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 0,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 0,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {},
        "korean": {}
      }
    },
    {
      "slug": "dna-1-0",
      "canonicalModelKey": "dna-1-0",
      "model": "DNA 1.0 8B",
      "creator": "Community",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "32K",
      "contextWindowTokens": 32000,
      "displayScore": 0,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/dna-1-0",
      "markdownUrl": "https://benchlm.ai/md/models/dna-1-0.md",
      "id": 215,
      "releaseDate": null,
      "market": "Korea",
      "isRegional": true,
      "family": {
        "familyKey": "dna-1-0",
        "familyName": "DNA 1.0",
        "variantType": null,
        "snapshotLabel": null,
        "baseFamilyModelKey": "dna-1-0",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 0,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 0,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {},
        "korean": {}
      }
    },
    {
      "slug": "holotron-12b",
      "canonicalModelKey": "holotron-12b",
      "model": "Holotron-12B",
      "creator": "H Company",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 0,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/holotron-12b",
      "markdownUrl": "https://benchlm.ai/md/models/holotron-12b.md",
      "id": 222,
      "releaseDate": null,
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "holotron",
        "familyName": "Holotron",
        "variantType": "12b",
        "snapshotLabel": null,
        "baseFamilyModelKey": "holotron-12b",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 0,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 0,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "hyperclova-x-dash",
      "canonicalModelKey": "hyperclova-x-dash",
      "model": "HyperClova X Dash",
      "creator": "Naver Cloud",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 0,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/hyperclova-x-dash",
      "markdownUrl": "https://benchlm.ai/md/models/hyperclova-x-dash.md",
      "id": 207,
      "releaseDate": null,
      "market": "Korea",
      "isRegional": true,
      "family": {
        "familyKey": "hyperclova",
        "familyName": "HyperClova X",
        "variantType": null,
        "snapshotLabel": null,
        "baseFamilyModelKey": "hyperclova-x-think",
        "relatedModelKeys": [
          "hyperclova-x-think",
          "hyperclova-x-seed-8b"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 0,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 0,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {},
        "korean": {}
      }
    },
    {
      "slug": "hyperclova-x-think",
      "canonicalModelKey": "hyperclova-x-think",
      "model": "HyperClova X Think 32B",
      "creator": "Naver Cloud",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 0,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/hyperclova-x-think",
      "markdownUrl": "https://benchlm.ai/md/models/hyperclova-x-think.md",
      "id": 206,
      "releaseDate": null,
      "market": "Korea",
      "isRegional": true,
      "family": {
        "familyKey": "hyperclova",
        "familyName": "HyperClova X",
        "variantType": null,
        "snapshotLabel": null,
        "baseFamilyModelKey": "hyperclova-x-think",
        "relatedModelKeys": [
          "hyperclova-x-dash",
          "hyperclova-x-seed-8b"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 0,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 0,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {},
        "korean": {}
      }
    },
    {
      "slug": "kanana-essence",
      "canonicalModelKey": "kanana-essence",
      "model": "Kanana Essence",
      "creator": "Kakao",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "64K",
      "contextWindowTokens": 64000,
      "displayScore": 0,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/kanana-essence",
      "markdownUrl": "https://benchlm.ai/md/models/kanana-essence.md",
      "id": 210,
      "releaseDate": null,
      "market": "Korea",
      "isRegional": true,
      "family": {
        "familyKey": "kanana",
        "familyName": "Kanana",
        "variantType": null,
        "snapshotLabel": null,
        "baseFamilyModelKey": "kanana-flag",
        "relatedModelKeys": [
          "kanana-flag",
          "kanana-nano"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 0,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 0,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {},
        "korean": {}
      }
    },
    {
      "slug": "kanana-flag",
      "canonicalModelKey": "kanana-flag",
      "model": "Kanana Flag",
      "creator": "Kakao",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "64K",
      "contextWindowTokens": 64000,
      "displayScore": 0,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/kanana-flag",
      "markdownUrl": "https://benchlm.ai/md/models/kanana-flag.md",
      "id": 209,
      "releaseDate": null,
      "market": "Korea",
      "isRegional": true,
      "family": {
        "familyKey": "kanana",
        "familyName": "Kanana",
        "variantType": null,
        "snapshotLabel": null,
        "baseFamilyModelKey": "kanana-flag",
        "relatedModelKeys": [
          "kanana-essence",
          "kanana-nano"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 0,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 0,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {},
        "korean": {}
      }
    },
    {
      "slug": "kanana-nano",
      "canonicalModelKey": "kanana-nano",
      "model": "Kanana Nano",
      "creator": "Kakao",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "64K",
      "contextWindowTokens": 64000,
      "displayScore": 0,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/kanana-nano",
      "markdownUrl": "https://benchlm.ai/md/models/kanana-nano.md",
      "id": 211,
      "releaseDate": null,
      "market": "Korea",
      "isRegional": true,
      "family": {
        "familyKey": "kanana",
        "familyName": "Kanana",
        "variantType": null,
        "snapshotLabel": null,
        "baseFamilyModelKey": "kanana-flag",
        "relatedModelKeys": [
          "kanana-flag",
          "kanana-essence"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 0,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 0,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {},
        "korean": {}
      }
    },
    {
      "slug": "orion-mistral-24b",
      "canonicalModelKey": "orion-mistral-24b",
      "model": "OriOn-Mistral-24B",
      "creator": "LightOn",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "344K",
      "contextWindowTokens": 344000,
      "displayScore": 0,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/orion-mistral-24b",
      "markdownUrl": "https://benchlm.ai/md/models/orion-mistral-24b.md",
      "id": 221,
      "releaseDate": null,
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "orion",
        "familyName": "OriOn",
        "variantType": "mistral-24b",
        "snapshotLabel": null,
        "baseFamilyModelKey": "orion-qwen-32b",
        "relatedModelKeys": [
          "orion-qwen-32b"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 0,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 0,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "orion-qwen-32b",
      "canonicalModelKey": "orion-qwen-32b",
      "model": "OriOn-Qwen-32B",
      "creator": "LightOn",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "262K",
      "contextWindowTokens": 262000,
      "displayScore": 0,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/orion-qwen-32b",
      "markdownUrl": "https://benchlm.ai/md/models/orion-qwen-32b.md",
      "id": 220,
      "releaseDate": null,
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "orion",
        "familyName": "OriOn",
        "variantType": "qwen-32b",
        "snapshotLabel": null,
        "baseFamilyModelKey": "orion-qwen-32b",
        "relatedModelKeys": [
          "orion-mistral-24b"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 0,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 0,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "pharia-1-llm-7b-control",
      "canonicalModelKey": "pharia-1-llm-7b-control",
      "model": "Pharia-1-LLM-7B-control",
      "creator": "Aleph Alpha",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "8K",
      "contextWindowTokens": 8000,
      "displayScore": 0,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/pharia-1-llm-7b-control",
      "markdownUrl": "https://benchlm.ai/md/models/pharia-1-llm-7b-control.md",
      "id": 218,
      "releaseDate": null,
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "pharia-1-llm-7b",
        "familyName": "Pharia-1-LLM-7B",
        "variantType": "control",
        "snapshotLabel": null,
        "baseFamilyModelKey": "pharia-1-llm-7b-control",
        "relatedModelKeys": [
          "pharia-1-llm-7b-control-aligned"
        ],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 0,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 0,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "pharia-1-llm-7b-control-aligned",
      "canonicalModelKey": "pharia-1-llm-7b-control-aligned",
      "model": "Pharia-1-LLM-7B-control-aligned",
      "creator": "Aleph Alpha",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "8K",
      "contextWindowTokens": 8000,
      "displayScore": 0,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/pharia-1-llm-7b-control-aligned",
      "markdownUrl": "https://benchlm.ai/md/models/pharia-1-llm-7b-control-aligned.md",
      "id": 219,
      "releaseDate": null,
      "market": null,
      "isRegional": false,
      "family": {
        "familyKey": "pharia-1-llm-7b",
        "familyName": "Pharia-1-LLM-7B",
        "variantType": "control-aligned",
        "snapshotLabel": null,
        "baseFamilyModelKey": "pharia-1-llm-7b-control",
        "relatedModelKeys": [
          "pharia-1-llm-7b-control"
        ],
        "isCanonicalFamilyEntry": false,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 0,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 0,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {}
      }
    },
    {
      "slug": "thunder-llm",
      "canonicalModelKey": "thunder-llm",
      "model": "Thunder-LLM 8B",
      "creator": "Academic",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "32K",
      "contextWindowTokens": 32000,
      "displayScore": 0,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/thunder-llm",
      "markdownUrl": "https://benchlm.ai/md/models/thunder-llm.md",
      "id": 214,
      "releaseDate": null,
      "market": "Korea",
      "isRegional": true,
      "family": {
        "familyKey": "thunder-llm",
        "familyName": "Thunder-LLM",
        "variantType": null,
        "snapshotLabel": null,
        "baseFamilyModelKey": "thunder-llm",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 0,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 0,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {},
        "korean": {}
      }
    },
    {
      "slug": "varco",
      "canonicalModelKey": "varco",
      "model": "Varco",
      "creator": "NC AI",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "64K",
      "contextWindowTokens": 64000,
      "displayScore": 0,
      "rankingEligible": false,
      "overallRank": null,
      "url": "https://benchlm.ai/models/varco",
      "markdownUrl": "https://benchlm.ai/md/models/varco.md",
      "id": 213,
      "releaseDate": null,
      "market": "Korea",
      "isRegional": true,
      "family": {
        "familyKey": "varco",
        "familyName": "Varco",
        "variantType": null,
        "snapshotLabel": null,
        "baseFamilyModelKey": "varco",
        "relatedModelKeys": [],
        "isCanonicalFamilyEntry": true,
        "supersedesModelKey": null
      },
      "scores": {
        "displayScore": 0,
        "overallScore": 0,
        "rawOverallScore": 0,
        "verifiedDisplayScore": null,
        "displayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        },
        "verifiedDisplayCategoryScores": {
          "agentic": null,
          "coding": null,
          "reasoning": null,
          "multimodalGrounded": null,
          "knowledge": null,
          "multilingual": null,
          "instructionFollowing": null,
          "math": null
        }
      },
      "ranking": {
        "rankingEligible": false,
        "verifiedRankingEligible": false,
        "overallRank": null,
        "categoryRanks": {},
        "categoryRankingEligible": {
          "agentic": false,
          "coding": false,
          "reasoning": false,
          "multimodalGrounded": false,
          "knowledge": false,
          "multilingual": false,
          "instructionFollowing": false,
          "math": false
        }
      },
      "coverage": {
        "trustedBenchmarkCount": 0,
        "verifiedBenchmarkCount": 0,
        "rankableBenchmarkCount": 0,
        "generatedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      "benchmarks": {
        "agentic": {},
        "coding": {},
        "reasoning": {},
        "multimodalGrounded": {},
        "knowledge": {},
        "multilingual": {},
        "instructionFollowing": {},
        "math": {},
        "korean": {}
      }
    }
  ]
}
