{
  "schemaVersion": "1.0",
  "name": "BenchLM leaderboards",
  "description": "Overall and category leaderboards derived from the public BenchLM ranking logic.",
  "canonicalUrl": "https://benchlm.ai/data/leaderboard.json",
  "generatedAt": "2026-06-12T20:35:11.146Z",
  "sourceLastUpdated": "June 12, 2026",
  "sourceFiles": [
    "src/data/benchmarks.json",
    "src/data/provenance.js",
    "src/data/modelReleaseMetadata.js",
    "src/data/scoring.js"
  ],
  "counts": {
    "overall": 123,
    "categories": {
      "agentic": 102,
      "coding": 101,
      "reasoning": 93,
      "multimodalGrounded": 110,
      "knowledge": 107,
      "multilingual": 106,
      "instructionFollowing": 132,
      "math": 87
    }
  },
  "items": [
    {
      "rank": 1,
      "slug": "claude-mythos-5",
      "canonicalModelKey": "claude-mythos-5",
      "model": "Claude Mythos 5",
      "creator": "Anthropic",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "1M+",
      "contextWindowTokens": 1000000,
      "displayScore": 99,
      "rankingEligible": true,
      "overallRank": 1,
      "url": "https://benchlm.ai/models/claude-mythos-5",
      "markdownUrl": "https://benchlm.ai/md/models/claude-mythos-5.md",
      "score": 99,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 17,
      "scoreConfidence": 3
    },
    {
      "rank": 2,
      "slug": "claude-fable",
      "canonicalModelKey": "claude-fable-5",
      "model": "Claude Fable 5",
      "creator": "Anthropic",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "1M+",
      "contextWindowTokens": 1000000,
      "displayScore": 97,
      "rankingEligible": true,
      "overallRank": 2,
      "url": "https://benchlm.ai/models/claude-fable",
      "markdownUrl": "https://benchlm.ai/md/models/claude-fable.md",
      "score": 97,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 17,
      "scoreConfidence": 3
    },
    {
      "rank": 3,
      "slug": "claude-opus-4-8",
      "canonicalModelKey": "claude-opus-4-8",
      "model": "Claude Opus 4.8",
      "creator": "Anthropic",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 93,
      "rankingEligible": true,
      "overallRank": 3,
      "url": "https://benchlm.ai/models/claude-opus-4-8",
      "markdownUrl": "https://benchlm.ai/md/models/claude-opus-4-8.md",
      "score": 93,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 23,
      "scoreConfidence": 2
    },
    {
      "rank": 4,
      "slug": "gemini-3-1-pro",
      "canonicalModelKey": "gemini-3-1-pro",
      "model": "Gemini 3.1 Pro",
      "creator": "Google",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 91,
      "rankingEligible": true,
      "overallRank": 4,
      "url": "https://benchlm.ai/models/gemini-3-1-pro",
      "markdownUrl": "https://benchlm.ai/md/models/gemini-3-1-pro.md",
      "score": 91,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 17,
      "scoreConfidence": 3
    },
    {
      "rank": 5,
      "slug": "qwen3-7-max",
      "canonicalModelKey": "qwen3-7-max",
      "model": "Qwen3.7 Max",
      "creator": "Alibaba",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 91,
      "rankingEligible": true,
      "overallRank": 5,
      "url": "https://benchlm.ai/models/qwen3-7-max",
      "markdownUrl": "https://benchlm.ai/md/models/qwen3-7-max.md",
      "score": 91,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 33,
      "scoreConfidence": 3
    },
    {
      "rank": 6,
      "slug": "gpt-5-4-pro",
      "canonicalModelKey": "gpt-5-4-pro",
      "model": "GPT-5.4 Pro",
      "creator": "OpenAI",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "1.05M",
      "contextWindowTokens": 1050000,
      "displayScore": 90,
      "rankingEligible": true,
      "overallRank": 6,
      "url": "https://benchlm.ai/models/gpt-5-4-pro",
      "markdownUrl": "https://benchlm.ai/md/models/gpt-5-4-pro.md",
      "score": 90,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 9,
      "scoreConfidence": 2
    },
    {
      "rank": 7,
      "slug": "gpt-5-5",
      "canonicalModelKey": "gpt-5-5",
      "model": "GPT-5.5",
      "creator": "OpenAI",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 89,
      "rankingEligible": true,
      "overallRank": 7,
      "url": "https://benchlm.ai/models/gpt-5-5",
      "markdownUrl": "https://benchlm.ai/md/models/gpt-5-5.md",
      "score": 89,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 22,
      "scoreConfidence": 3
    },
    {
      "rank": 8,
      "slug": "gemini-3-pro-deep-think",
      "canonicalModelKey": "gemini-3-pro-deep-think",
      "model": "Gemini 3 Pro Deep Think",
      "creator": "Google",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "2M",
      "contextWindowTokens": 2000000,
      "displayScore": 89,
      "rankingEligible": true,
      "overallRank": 8,
      "url": "https://benchlm.ai/models/gemini-3-pro-deep-think",
      "markdownUrl": "https://benchlm.ai/md/models/gemini-3-pro-deep-think.md",
      "score": 89,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 1,
      "scoreConfidence": 1
    },
    {
      "rank": 9,
      "slug": "grok-4-1",
      "canonicalModelKey": "grok-4-1",
      "model": "Grok 4.1",
      "creator": "xAI",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 89,
      "rankingEligible": true,
      "overallRank": 9,
      "url": "https://benchlm.ai/models/grok-4-1",
      "markdownUrl": "https://benchlm.ai/md/models/grok-4-1.md",
      "score": 89,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 0,
      "scoreConfidence": 1
    },
    {
      "rank": 10,
      "slug": "gpt-5-4",
      "canonicalModelKey": "gpt-5-4",
      "model": "GPT-5.4",
      "creator": "OpenAI",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "1.05M",
      "contextWindowTokens": 1050000,
      "displayScore": 88,
      "rankingEligible": true,
      "overallRank": 10,
      "url": "https://benchlm.ai/models/gpt-5-4",
      "markdownUrl": "https://benchlm.ai/md/models/gpt-5-4.md",
      "score": 88,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 26,
      "scoreConfidence": 4
    },
    {
      "rank": 11,
      "slug": "qwen3-7-plus",
      "canonicalModelKey": "qwen3-7-plus",
      "model": "Qwen3.7 Plus",
      "creator": "Alibaba",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 88,
      "rankingEligible": true,
      "overallRank": 11,
      "url": "https://benchlm.ai/models/qwen3-7-plus",
      "markdownUrl": "https://benchlm.ai/md/models/qwen3-7-plus.md",
      "score": 88,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 50,
      "scoreConfidence": 4
    },
    {
      "rank": 12,
      "slug": "claude-opus-4-6",
      "canonicalModelKey": "claude-opus-4-6",
      "model": "Claude Opus 4.6",
      "creator": "Anthropic",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 86,
      "rankingEligible": true,
      "overallRank": 12,
      "url": "https://benchlm.ai/models/claude-opus-4-6",
      "markdownUrl": "https://benchlm.ai/md/models/claude-opus-4-6.md",
      "score": 86,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 27,
      "scoreConfidence": 4
    },
    {
      "rank": 13,
      "slug": "gemini-3-5-flash",
      "canonicalModelKey": "gemini-3-5-flash",
      "model": "Gemini 3.5 Flash",
      "creator": "Google",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 86,
      "rankingEligible": true,
      "overallRank": 13,
      "url": "https://benchlm.ai/models/gemini-3-5-flash",
      "markdownUrl": "https://benchlm.ai/md/models/gemini-3-5-flash.md",
      "score": 86,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 20,
      "scoreConfidence": 3
    },
    {
      "rank": 14,
      "slug": "deepseek-v4-pro-max",
      "canonicalModelKey": "deepseek-v4-pro-max",
      "model": "DeepSeek V4 Pro (Max)",
      "creator": "DeepSeek",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 86,
      "rankingEligible": true,
      "overallRank": 14,
      "url": "https://benchlm.ai/models/deepseek-v4-pro-max",
      "markdownUrl": "https://benchlm.ai/md/models/deepseek-v4-pro-max.md",
      "score": 86,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 24,
      "scoreConfidence": 2
    },
    {
      "rank": 15,
      "slug": "gpt-5-3-codex",
      "canonicalModelKey": "gpt-5-3-codex",
      "model": "GPT-5.3 Codex",
      "creator": "OpenAI",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "400K",
      "contextWindowTokens": 400000,
      "displayScore": 85,
      "rankingEligible": true,
      "overallRank": 15,
      "url": "https://benchlm.ai/models/gpt-5-3-codex",
      "markdownUrl": "https://benchlm.ai/md/models/gpt-5-3-codex.md",
      "score": 85,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 6,
      "scoreConfidence": 1
    },
    {
      "rank": 16,
      "slug": "claude-opus-4-7-adaptive",
      "canonicalModelKey": "claude-opus-4-7-max",
      "model": "Claude Opus 4.7 (Adaptive)",
      "creator": "Anthropic",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 84,
      "rankingEligible": true,
      "overallRank": 16,
      "url": "https://benchlm.ai/models/claude-opus-4-7-adaptive",
      "markdownUrl": "https://benchlm.ai/md/models/claude-opus-4-7-adaptive.md",
      "score": 84,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 18,
      "scoreConfidence": 3
    },
    {
      "rank": 17,
      "slug": "glm-5-1",
      "canonicalModelKey": "glm-5-1",
      "model": "GLM-5.1",
      "creator": "Z.AI",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "203K",
      "contextWindowTokens": 203000,
      "displayScore": 82,
      "rankingEligible": true,
      "overallRank": 17,
      "url": "https://benchlm.ai/models/glm-5-1",
      "markdownUrl": "https://benchlm.ai/md/models/glm-5-1.md",
      "score": 82,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 16,
      "scoreConfidence": 3
    },
    {
      "rank": 18,
      "slug": "claude-sonnet-4-6",
      "canonicalModelKey": "claude-sonnet-4-6",
      "model": "Claude Sonnet 4.6",
      "creator": "Anthropic",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "200K",
      "contextWindowTokens": 200000,
      "displayScore": 82,
      "rankingEligible": true,
      "overallRank": 18,
      "url": "https://benchlm.ai/models/claude-sonnet-4-6",
      "markdownUrl": "https://benchlm.ai/md/models/claude-sonnet-4-6.md",
      "score": 82,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 14,
      "scoreConfidence": 3
    },
    {
      "rank": 19,
      "slug": "deepseek-v4-pro-high",
      "canonicalModelKey": "deepseek-v4-pro-high",
      "model": "DeepSeek V4 Pro (High)",
      "creator": "DeepSeek",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 82,
      "rankingEligible": true,
      "overallRank": 19,
      "url": "https://benchlm.ai/models/deepseek-v4-pro-high",
      "markdownUrl": "https://benchlm.ai/md/models/deepseek-v4-pro-high.md",
      "score": 82,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 23,
      "scoreConfidence": 2
    },
    {
      "rank": 20,
      "slug": "o1-preview",
      "canonicalModelKey": "o1-preview",
      "model": "o1-preview",
      "creator": "OpenAI",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "200K",
      "contextWindowTokens": 200000,
      "displayScore": 82,
      "rankingEligible": true,
      "overallRank": 20,
      "url": "https://benchlm.ai/models/o1-preview",
      "markdownUrl": "https://benchlm.ai/md/models/o1-preview.md",
      "score": 82,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 0,
      "scoreConfidence": 1
    },
    {
      "rank": 21,
      "slug": "kimi-2-6",
      "canonicalModelKey": "kimi-2-6",
      "model": "Kimi K2.6",
      "creator": "Moonshot AI",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "256K",
      "contextWindowTokens": 256000,
      "displayScore": 81,
      "rankingEligible": true,
      "overallRank": 21,
      "url": "https://benchlm.ai/models/kimi-2-6",
      "markdownUrl": "https://benchlm.ai/md/models/kimi-2-6.md",
      "score": 81,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 28,
      "scoreConfidence": 2
    },
    {
      "rank": 22,
      "slug": "gemini-3-pro",
      "canonicalModelKey": "gemini-3-pro",
      "model": "Gemini 3 Pro",
      "creator": "Google",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "2M",
      "contextWindowTokens": 2000000,
      "displayScore": 80,
      "rankingEligible": true,
      "overallRank": 22,
      "url": "https://benchlm.ai/models/gemini-3-pro",
      "markdownUrl": "https://benchlm.ai/md/models/gemini-3-pro.md",
      "score": 80,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 8,
      "scoreConfidence": 2
    },
    {
      "rank": 23,
      "slug": "minimax-m3",
      "canonicalModelKey": "minimax-m3",
      "model": "MiniMax M3",
      "creator": "MiniMax",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 79,
      "rankingEligible": true,
      "overallRank": 23,
      "url": "https://benchlm.ai/models/minimax-m3",
      "markdownUrl": "https://benchlm.ai/md/models/minimax-m3.md",
      "score": 79,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 15,
      "scoreConfidence": 2
    },
    {
      "rank": 24,
      "slug": "glm-5-reasoning",
      "canonicalModelKey": "glm-5-reasoning",
      "model": "GLM-5 (Reasoning)",
      "creator": "Z.AI",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "200K",
      "contextWindowTokens": 200000,
      "displayScore": 79,
      "rankingEligible": true,
      "overallRank": 24,
      "url": "https://benchlm.ai/models/glm-5-reasoning",
      "markdownUrl": "https://benchlm.ai/md/models/glm-5-reasoning.md",
      "score": 79,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 1,
      "scoreConfidence": 1
    },
    {
      "rank": 25,
      "slug": "gpt-5-2",
      "canonicalModelKey": "gpt-5-2",
      "model": "GPT-5.2",
      "creator": "OpenAI",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "400K",
      "contextWindowTokens": 400000,
      "displayScore": 78,
      "rankingEligible": true,
      "overallRank": 25,
      "url": "https://benchlm.ai/models/gpt-5-2",
      "markdownUrl": "https://benchlm.ai/md/models/gpt-5-2.md",
      "score": 78,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 11,
      "scoreConfidence": 2
    },
    {
      "rank": 26,
      "slug": "qwen3-5-397b-reasoning",
      "canonicalModelKey": "qwen3-5-397b-reasoning",
      "model": "Qwen3.5 397B (Reasoning)",
      "creator": "Alibaba",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 77,
      "rankingEligible": true,
      "overallRank": 26,
      "url": "https://benchlm.ai/models/qwen3-5-397b-reasoning",
      "markdownUrl": "https://benchlm.ai/md/models/qwen3-5-397b-reasoning.md",
      "score": 77,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 0,
      "scoreConfidence": 1
    },
    {
      "rank": 27,
      "slug": "gpt-5-1",
      "canonicalModelKey": "gpt-5-1",
      "model": "GPT-5.1",
      "creator": "OpenAI",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "200K",
      "contextWindowTokens": 200000,
      "displayScore": 77,
      "rankingEligible": true,
      "overallRank": 27,
      "url": "https://benchlm.ai/models/gpt-5-1",
      "markdownUrl": "https://benchlm.ai/md/models/gpt-5-1.md",
      "score": 77,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 1,
      "scoreConfidence": 1
    },
    {
      "rank": 28,
      "slug": "claude-opus-4-5",
      "canonicalModelKey": "claude-opus-4-5",
      "model": "Claude Opus 4.5",
      "creator": "Anthropic",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "200K",
      "contextWindowTokens": 200000,
      "displayScore": 76,
      "rankingEligible": true,
      "overallRank": 28,
      "url": "https://benchlm.ai/models/claude-opus-4-5",
      "markdownUrl": "https://benchlm.ai/md/models/claude-opus-4-5.md",
      "score": 76,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 41,
      "scoreConfidence": 4
    },
    {
      "rank": 29,
      "slug": "gpt-5-high",
      "canonicalModelKey": "gpt-5-high",
      "model": "GPT-5 (high)",
      "creator": "OpenAI",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 76,
      "rankingEligible": true,
      "overallRank": 29,
      "url": "https://benchlm.ai/models/gpt-5-high",
      "markdownUrl": "https://benchlm.ai/md/models/gpt-5-high.md",
      "score": 76,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 1,
      "scoreConfidence": 1
    },
    {
      "rank": 30,
      "slug": "gpt-5-2-codex",
      "canonicalModelKey": "gpt-5-2-codex",
      "model": "GPT-5.2-Codex",
      "creator": "OpenAI",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "400K",
      "contextWindowTokens": 400000,
      "displayScore": 76,
      "rankingEligible": true,
      "overallRank": 30,
      "url": "https://benchlm.ai/models/gpt-5-2-codex",
      "markdownUrl": "https://benchlm.ai/md/models/gpt-5-2-codex.md",
      "score": 76,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 1,
      "scoreConfidence": 1
    },
    {
      "rank": 31,
      "slug": "kimi-k2-5-reasoning",
      "canonicalModelKey": "kimi-k2-5-reasoning",
      "model": "Kimi K2.5 (Reasoning)",
      "creator": "Moonshot AI",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 75,
      "rankingEligible": true,
      "overallRank": 31,
      "url": "https://benchlm.ai/models/kimi-k2-5-reasoning",
      "markdownUrl": "https://benchlm.ai/md/models/kimi-k2-5-reasoning.md",
      "score": 75,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 8,
      "scoreConfidence": 2
    },
    {
      "rank": 32,
      "slug": "gpt-5-1-codex-max",
      "canonicalModelKey": "gpt-5-1-codex-max",
      "model": "GPT-5.1-Codex-Max",
      "creator": "OpenAI",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "400K",
      "contextWindowTokens": 400000,
      "displayScore": 75,
      "rankingEligible": true,
      "overallRank": 32,
      "url": "https://benchlm.ai/models/gpt-5-1-codex-max",
      "markdownUrl": "https://benchlm.ai/md/models/gpt-5-1-codex-max.md",
      "score": 75,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 1,
      "scoreConfidence": 1
    },
    {
      "rank": 33,
      "slug": "deepseek-v4-flash-max",
      "canonicalModelKey": "deepseek-v4-flash-max",
      "model": "DeepSeek V4 Flash (Max)",
      "creator": "DeepSeek",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 74,
      "rankingEligible": true,
      "overallRank": 33,
      "url": "https://benchlm.ai/models/deepseek-v4-flash-max",
      "markdownUrl": "https://benchlm.ai/md/models/deepseek-v4-flash-max.md",
      "score": 74,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 23,
      "scoreConfidence": 2
    },
    {
      "rank": 34,
      "slug": "qwen3-6-27b",
      "canonicalModelKey": "qwen3-6-27b",
      "model": "Qwen3.6-27B",
      "creator": "Alibaba",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "262K",
      "contextWindowTokens": 262000,
      "displayScore": 72,
      "rankingEligible": true,
      "overallRank": 34,
      "url": "https://benchlm.ai/models/qwen3-6-27b",
      "markdownUrl": "https://benchlm.ai/md/models/qwen3-6-27b.md",
      "score": 72,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 37,
      "scoreConfidence": 2
    },
    {
      "rank": 35,
      "slug": "grok-4-20-beta",
      "canonicalModelKey": "grok-4-20-beta",
      "model": "Grok 4.20",
      "creator": "xAI",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "2M",
      "contextWindowTokens": 2000000,
      "displayScore": 71,
      "rankingEligible": true,
      "overallRank": 35,
      "url": "https://benchlm.ai/models/grok-4-20-beta",
      "markdownUrl": "https://benchlm.ai/md/models/grok-4-20-beta.md",
      "score": 71,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 16,
      "scoreConfidence": 3
    },
    {
      "rank": 36,
      "slug": "deepseek-v4-flash-high",
      "canonicalModelKey": "deepseek-v4-flash-high",
      "model": "DeepSeek V4 Flash (High)",
      "creator": "DeepSeek",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 71,
      "rankingEligible": true,
      "overallRank": 36,
      "url": "https://benchlm.ai/models/deepseek-v4-flash-high",
      "markdownUrl": "https://benchlm.ai/md/models/deepseek-v4-flash-high.md",
      "score": 71,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 23,
      "scoreConfidence": 2
    },
    {
      "rank": 37,
      "slug": "gpt-5-medium",
      "canonicalModelKey": "gpt-5-medium",
      "model": "GPT-5 (medium)",
      "creator": "OpenAI",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 70,
      "rankingEligible": true,
      "overallRank": 37,
      "url": "https://benchlm.ai/models/gpt-5-medium",
      "markdownUrl": "https://benchlm.ai/md/models/gpt-5-medium.md",
      "score": 70,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 0,
      "scoreConfidence": 1
    },
    {
      "rank": 38,
      "slug": "nemotron-3-ultra",
      "canonicalModelKey": "nemotron-3-ultra-500b",
      "model": "Nemotron 3 Ultra",
      "creator": "NVIDIA",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 68,
      "rankingEligible": true,
      "overallRank": 38,
      "url": "https://benchlm.ai/models/nemotron-3-ultra",
      "markdownUrl": "https://benchlm.ai/md/models/nemotron-3-ultra.md",
      "score": 68,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 18,
      "scoreConfidence": 3
    },
    {
      "rank": 39,
      "slug": "deepseek-v4-pro",
      "canonicalModelKey": "deepseek-v4-pro",
      "model": "DeepSeek V4 Pro",
      "creator": "DeepSeek",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 68,
      "rankingEligible": true,
      "overallRank": 39,
      "url": "https://benchlm.ai/models/deepseek-v4-pro",
      "markdownUrl": "https://benchlm.ai/md/models/deepseek-v4-pro.md",
      "score": 68,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 21,
      "scoreConfidence": 2
    },
    {
      "rank": 40,
      "slug": "glm-4-7",
      "canonicalModelKey": "glm-4-7",
      "model": "GLM-4.7",
      "creator": "Z.AI",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "200K",
      "contextWindowTokens": 200000,
      "displayScore": 68,
      "rankingEligible": true,
      "overallRank": 40,
      "url": "https://benchlm.ai/models/glm-4-7",
      "markdownUrl": "https://benchlm.ai/md/models/glm-4-7.md",
      "score": 68,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 7,
      "scoreConfidence": 1
    },
    {
      "rank": 41,
      "slug": "grok-4-1-fast",
      "canonicalModelKey": "grok-4-1-fast",
      "model": "Grok 4.1 Fast",
      "creator": "xAI",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 68,
      "rankingEligible": true,
      "overallRank": 41,
      "url": "https://benchlm.ai/models/grok-4-1-fast",
      "markdownUrl": "https://benchlm.ai/md/models/grok-4-1-fast.md",
      "score": 68,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 0,
      "scoreConfidence": 1
    },
    {
      "rank": 42,
      "slug": "glm-5",
      "canonicalModelKey": "glm-5",
      "model": "GLM-5",
      "creator": "Z.AI",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "200K",
      "contextWindowTokens": 200000,
      "displayScore": 67,
      "rankingEligible": true,
      "overallRank": 42,
      "url": "https://benchlm.ai/models/glm-5",
      "markdownUrl": "https://benchlm.ai/md/models/glm-5.md",
      "score": 67,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 33,
      "scoreConfidence": 4
    },
    {
      "rank": 43,
      "slug": "qwen3-6-plus",
      "canonicalModelKey": "qwen3-6-plus",
      "model": "Qwen3.6 Plus",
      "creator": "Alibaba",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 66,
      "rankingEligible": true,
      "overallRank": 43,
      "url": "https://benchlm.ai/models/qwen3-6-plus",
      "markdownUrl": "https://benchlm.ai/md/models/qwen3-6-plus.md",
      "score": 66,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 38,
      "scoreConfidence": 4
    },
    {
      "rank": 44,
      "slug": "mai-thinking-1",
      "canonicalModelKey": "mai-thinking-1",
      "model": "MAI-Thinking-1",
      "creator": "Microsoft",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "256K",
      "contextWindowTokens": 256000,
      "displayScore": 65,
      "rankingEligible": true,
      "overallRank": 44,
      "url": "https://benchlm.ai/models/mai-thinking-1",
      "markdownUrl": "https://benchlm.ai/md/models/mai-thinking-1.md",
      "score": 65,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 14,
      "scoreConfidence": 3
    },
    {
      "rank": 45,
      "slug": "qwen3-6-35b-a3b",
      "canonicalModelKey": "qwen3-6-35b-a3b",
      "model": "Qwen3.6-35B-A3B",
      "creator": "Alibaba",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "262K",
      "contextWindowTokens": 262000,
      "displayScore": 65,
      "rankingEligible": true,
      "overallRank": 45,
      "url": "https://benchlm.ai/models/qwen3-6-35b-a3b",
      "markdownUrl": "https://benchlm.ai/md/models/qwen3-6-35b-a3b.md",
      "score": 65,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 40,
      "scoreConfidence": 2
    },
    {
      "rank": 46,
      "slug": "claude-sonnet-4-5",
      "canonicalModelKey": "claude-sonnet-4-5",
      "model": "Claude Sonnet 4.5",
      "creator": "Anthropic",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "200K",
      "contextWindowTokens": 200000,
      "displayScore": 64,
      "rankingEligible": true,
      "overallRank": 46,
      "url": "https://benchlm.ai/models/claude-sonnet-4-5",
      "markdownUrl": "https://benchlm.ai/md/models/claude-sonnet-4-5.md",
      "score": 64,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 6,
      "scoreConfidence": 1
    },
    {
      "rank": 47,
      "slug": "kimi-k2-5",
      "canonicalModelKey": "kimi-k2-5",
      "model": "Kimi K2.5",
      "creator": "Moonshot AI",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "256K",
      "contextWindowTokens": 256000,
      "displayScore": 63,
      "rankingEligible": true,
      "overallRank": 47,
      "url": "https://benchlm.ai/models/kimi-k2-5",
      "markdownUrl": "https://benchlm.ai/md/models/kimi-k2-5.md",
      "score": 63,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 39,
      "scoreConfidence": 4
    },
    {
      "rank": 48,
      "slug": "qwen3-5-122b-a10b",
      "canonicalModelKey": "qwen3-5-122b-a10b",
      "model": "Qwen3.5-122B-A10B",
      "creator": "Alibaba",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "262K",
      "contextWindowTokens": 262000,
      "displayScore": 63,
      "rankingEligible": true,
      "overallRank": 48,
      "url": "https://benchlm.ai/models/qwen3-5-122b-a10b",
      "markdownUrl": "https://benchlm.ai/md/models/qwen3-5-122b-a10b.md",
      "score": 63,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 13,
      "scoreConfidence": 3
    },
    {
      "rank": 49,
      "slug": "gemini-2-5-pro",
      "canonicalModelKey": "gemini-2-5-pro",
      "model": "Gemini 2.5 Pro",
      "creator": "Google",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 63,
      "rankingEligible": true,
      "overallRank": 49,
      "url": "https://benchlm.ai/models/gemini-2-5-pro",
      "markdownUrl": "https://benchlm.ai/md/models/gemini-2-5-pro.md",
      "score": 63,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 3,
      "scoreConfidence": 1
    },
    {
      "rank": 50,
      "slug": "grok-4",
      "canonicalModelKey": "grok-4",
      "model": "Grok 4",
      "creator": "xAI",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 63,
      "rankingEligible": true,
      "overallRank": 50,
      "url": "https://benchlm.ai/models/grok-4",
      "markdownUrl": "https://benchlm.ai/md/models/grok-4.md",
      "score": 63,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 1,
      "scoreConfidence": 1
    },
    {
      "rank": 51,
      "slug": "qwen3-5-397b",
      "canonicalModelKey": "qwen3-5-397b",
      "model": "Qwen3.5 397B",
      "creator": "Alibaba",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 62,
      "rankingEligible": true,
      "overallRank": 51,
      "url": "https://benchlm.ai/models/qwen3-5-397b",
      "markdownUrl": "https://benchlm.ai/md/models/qwen3-5-397b.md",
      "score": 62,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 36,
      "scoreConfidence": 4
    },
    {
      "rank": 52,
      "slug": "qwen3-5-27b",
      "canonicalModelKey": "qwen3-5-27b",
      "model": "Qwen3.5-27B",
      "creator": "Alibaba",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "262K",
      "contextWindowTokens": 262000,
      "displayScore": 61,
      "rankingEligible": true,
      "overallRank": 52,
      "url": "https://benchlm.ai/models/qwen3-5-27b",
      "markdownUrl": "https://benchlm.ai/md/models/qwen3-5-27b.md",
      "score": 61,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 13,
      "scoreConfidence": 3
    },
    {
      "rank": 53,
      "slug": "deepseek-v3-2-thinking",
      "canonicalModelKey": "deepseek-v3-2-thinking",
      "model": "DeepSeek V3.2 (Thinking)",
      "creator": "DeepSeek",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 60,
      "rankingEligible": true,
      "overallRank": 53,
      "url": "https://benchlm.ai/models/deepseek-v3-2-thinking",
      "markdownUrl": "https://benchlm.ai/md/models/deepseek-v3-2-thinking.md",
      "score": 60,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 1,
      "scoreConfidence": 1
    },
    {
      "rank": 54,
      "slug": "mimo-v2-flash",
      "canonicalModelKey": "mimo-v2-flash",
      "model": "MiMo-V2-Flash",
      "creator": "Xiaomi",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "256K",
      "contextWindowTokens": 256000,
      "displayScore": 59,
      "rankingEligible": true,
      "overallRank": 54,
      "url": "https://benchlm.ai/models/mimo-v2-flash",
      "markdownUrl": "https://benchlm.ai/md/models/mimo-v2-flash.md",
      "score": 59,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 1,
      "scoreConfidence": 1
    },
    {
      "rank": 55,
      "slug": "deepseek-v4-flash",
      "canonicalModelKey": "deepseek-v4-flash",
      "model": "DeepSeek V4 Flash",
      "creator": "DeepSeek",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 57,
      "rankingEligible": true,
      "overallRank": 55,
      "url": "https://benchlm.ai/models/deepseek-v4-flash",
      "markdownUrl": "https://benchlm.ai/md/models/deepseek-v4-flash.md",
      "score": 57,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 21,
      "scoreConfidence": 2
    },
    {
      "rank": 56,
      "slug": "gpt-4-1",
      "canonicalModelKey": "gpt-4-1",
      "model": "GPT-4.1",
      "creator": "OpenAI",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 57,
      "rankingEligible": true,
      "overallRank": 56,
      "url": "https://benchlm.ai/models/gpt-4-1",
      "markdownUrl": "https://benchlm.ai/md/models/gpt-4-1.md",
      "score": 57,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 4,
      "scoreConfidence": 1
    },
    {
      "rank": 57,
      "slug": "o3-pro",
      "canonicalModelKey": "o3-pro",
      "model": "o3-pro",
      "creator": "OpenAI",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "200K",
      "contextWindowTokens": 200000,
      "displayScore": 57,
      "rankingEligible": true,
      "overallRank": 57,
      "url": "https://benchlm.ai/models/o3-pro",
      "markdownUrl": "https://benchlm.ai/md/models/o3-pro.md",
      "score": 57,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 0,
      "scoreConfidence": 1
    },
    {
      "rank": 58,
      "slug": "o1",
      "canonicalModelKey": "o1",
      "model": "o1",
      "creator": "OpenAI",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "200K",
      "contextWindowTokens": 200000,
      "displayScore": 57,
      "rankingEligible": true,
      "overallRank": 58,
      "url": "https://benchlm.ai/models/o1",
      "markdownUrl": "https://benchlm.ai/md/models/o1.md",
      "score": 57,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 3,
      "scoreConfidence": 1
    },
    {
      "rank": 59,
      "slug": "deepseek-v3-2",
      "canonicalModelKey": "deepseek-v3-2",
      "model": "DeepSeek V3.2",
      "creator": "DeepSeek",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 56,
      "rankingEligible": true,
      "overallRank": 59,
      "url": "https://benchlm.ai/models/deepseek-v3-2",
      "markdownUrl": "https://benchlm.ai/md/models/deepseek-v3-2.md",
      "score": 56,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 4,
      "scoreConfidence": 1
    },
    {
      "rank": 60,
      "slug": "claude-haiku-4-5",
      "canonicalModelKey": "claude-haiku-4-5",
      "model": "Claude Haiku 4.5",
      "creator": "Anthropic",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "200K",
      "contextWindowTokens": 200000,
      "displayScore": 56,
      "rankingEligible": true,
      "overallRank": 60,
      "url": "https://benchlm.ai/models/claude-haiku-4-5",
      "markdownUrl": "https://benchlm.ai/md/models/claude-haiku-4-5.md",
      "score": 56,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 1,
      "scoreConfidence": 1
    },
    {
      "rank": 61,
      "slug": "o3",
      "canonicalModelKey": "o3",
      "model": "o3",
      "creator": "OpenAI",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "200K",
      "contextWindowTokens": 200000,
      "displayScore": 56,
      "rankingEligible": true,
      "overallRank": 61,
      "url": "https://benchlm.ai/models/o3",
      "markdownUrl": "https://benchlm.ai/md/models/o3.md",
      "score": 56,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 0,
      "scoreConfidence": 1
    },
    {
      "rank": 62,
      "slug": "qwen3-5-35b-a3b",
      "canonicalModelKey": "qwen3-5-35b-a3b",
      "model": "Qwen3.5-35B-A3B",
      "creator": "Alibaba",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "262K",
      "contextWindowTokens": 262000,
      "displayScore": 55,
      "rankingEligible": true,
      "overallRank": 62,
      "url": "https://benchlm.ai/models/qwen3-5-35b-a3b",
      "markdownUrl": "https://benchlm.ai/md/models/qwen3-5-35b-a3b.md",
      "score": 55,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 13,
      "scoreConfidence": 3
    },
    {
      "rank": 63,
      "slug": "gemini-3-flash",
      "canonicalModelKey": "gemini-3-flash",
      "model": "Gemini 3 Flash",
      "creator": "Google",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 55,
      "rankingEligible": true,
      "overallRank": 63,
      "url": "https://benchlm.ai/models/gemini-3-flash",
      "markdownUrl": "https://benchlm.ai/md/models/gemini-3-flash.md",
      "score": 55,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 2,
      "scoreConfidence": 1
    },
    {
      "rank": 64,
      "slug": "o3-mini",
      "canonicalModelKey": "o3-mini",
      "model": "o3-mini",
      "creator": "OpenAI",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "200K",
      "contextWindowTokens": 200000,
      "displayScore": 55,
      "rankingEligible": true,
      "overallRank": 64,
      "url": "https://benchlm.ai/models/o3-mini",
      "markdownUrl": "https://benchlm.ai/md/models/o3-mini.md",
      "score": 55,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 5,
      "scoreConfidence": 1
    },
    {
      "rank": 65,
      "slug": "minimax-m2-7",
      "canonicalModelKey": "minimax-m2-7",
      "model": "MiniMax M2.7",
      "creator": "MiniMax",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "200K",
      "contextWindowTokens": 200000,
      "displayScore": 53,
      "rankingEligible": true,
      "overallRank": 65,
      "url": "https://benchlm.ai/models/minimax-m2-7",
      "markdownUrl": "https://benchlm.ai/md/models/minimax-m2-7.md",
      "score": 53,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 17,
      "scoreConfidence": 2
    },
    {
      "rank": 66,
      "slug": "deepseek-coder-2-0",
      "canonicalModelKey": "deepseek-coder-2-0",
      "model": "DeepSeek Coder 2.0",
      "creator": "DeepSeek",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 51,
      "rankingEligible": true,
      "overallRank": 66,
      "url": "https://benchlm.ai/models/deepseek-coder-2-0",
      "markdownUrl": "https://benchlm.ai/md/models/deepseek-coder-2-0.md",
      "score": 51,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 0,
      "scoreConfidence": 1
    },
    {
      "rank": 67,
      "slug": "claude-4-1-opus",
      "canonicalModelKey": "claude-4-1-opus",
      "model": "Claude 4.1 Opus",
      "creator": "Anthropic",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "200K",
      "contextWindowTokens": 200000,
      "displayScore": 51,
      "rankingEligible": true,
      "overallRank": 67,
      "url": "https://benchlm.ai/models/claude-4-1-opus",
      "markdownUrl": "https://benchlm.ai/md/models/claude-4-1-opus.md",
      "score": 51,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 1,
      "scoreConfidence": 1
    },
    {
      "rank": 68,
      "slug": "deepseek-llm-2-0",
      "canonicalModelKey": "deepseek-llm-2-0",
      "model": "DeepSeek LLM 2.0",
      "creator": "DeepSeek",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 50,
      "rankingEligible": true,
      "overallRank": 68,
      "url": "https://benchlm.ai/models/deepseek-llm-2-0",
      "markdownUrl": "https://benchlm.ai/md/models/deepseek-llm-2-0.md",
      "score": 50,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 0,
      "scoreConfidence": 1
    },
    {
      "rank": 69,
      "slug": "qwen2-5-1m",
      "canonicalModelKey": "qwen2-5-1m",
      "model": "Qwen2.5-1M",
      "creator": "Alibaba",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 50,
      "rankingEligible": true,
      "overallRank": 69,
      "url": "https://benchlm.ai/models/qwen2-5-1m",
      "markdownUrl": "https://benchlm.ai/md/models/qwen2-5-1m.md",
      "score": 50,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 0,
      "scoreConfidence": 1
    },
    {
      "rank": 70,
      "slug": "claude-4-sonnet",
      "canonicalModelKey": "claude-4-sonnet",
      "model": "Claude 4 Sonnet",
      "creator": "Anthropic",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "200K",
      "contextWindowTokens": 200000,
      "displayScore": 50,
      "rankingEligible": true,
      "overallRank": 70,
      "url": "https://benchlm.ai/models/claude-4-sonnet",
      "markdownUrl": "https://benchlm.ai/md/models/claude-4-sonnet.md",
      "score": 50,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 1,
      "scoreConfidence": 1
    },
    {
      "rank": 71,
      "slug": "gpt-4o-mini",
      "canonicalModelKey": "gpt-4o-mini",
      "model": "GPT-4o mini",
      "creator": "OpenAI",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 49,
      "rankingEligible": true,
      "overallRank": 71,
      "url": "https://benchlm.ai/models/gpt-4o-mini",
      "markdownUrl": "https://benchlm.ai/md/models/gpt-4o-mini.md",
      "score": 49,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 0,
      "scoreConfidence": 1
    },
    {
      "rank": 72,
      "slug": "qwen2-5-72b",
      "canonicalModelKey": "qwen2-5-72b",
      "model": "Qwen2.5-72B",
      "creator": "Alibaba",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 49,
      "rankingEligible": true,
      "overallRank": 72,
      "url": "https://benchlm.ai/models/qwen2-5-72b",
      "markdownUrl": "https://benchlm.ai/md/models/qwen2-5-72b.md",
      "score": 49,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 0,
      "scoreConfidence": 1
    },
    {
      "rank": 73,
      "slug": "deepseekmath-v2",
      "canonicalModelKey": "deepseekmath-v2",
      "model": "DeepSeekMath V2",
      "creator": "DeepSeek",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 49,
      "rankingEligible": true,
      "overallRank": 73,
      "url": "https://benchlm.ai/models/deepseekmath-v2",
      "markdownUrl": "https://benchlm.ai/md/models/deepseekmath-v2.md",
      "score": 49,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 0,
      "scoreConfidence": 1
    },
    {
      "rank": 74,
      "slug": "mistral-large-3",
      "canonicalModelKey": "mistral-large-3",
      "model": "Mistral Large 3",
      "creator": "Mistral",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 48,
      "rankingEligible": true,
      "overallRank": 74,
      "url": "https://benchlm.ai/models/mistral-large-3",
      "markdownUrl": "https://benchlm.ai/md/models/mistral-large-3.md",
      "score": 48,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 0,
      "scoreConfidence": 1
    },
    {
      "rank": 75,
      "slug": "gemini-3-1-flash-lite",
      "canonicalModelKey": "gemini-3-1-flash-lite",
      "model": "Gemini 3.1 Flash-Lite",
      "creator": "Google",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 47,
      "rankingEligible": true,
      "overallRank": 75,
      "url": "https://benchlm.ai/models/gemini-3-1-flash-lite",
      "markdownUrl": "https://benchlm.ai/md/models/gemini-3-1-flash-lite.md",
      "score": 47,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 2,
      "scoreConfidence": 1
    },
    {
      "rank": 76,
      "slug": "qwen3-235b-2507-reasoning",
      "canonicalModelKey": "qwen3-235b-2507-reasoning",
      "model": "Qwen3 235B 2507 (Reasoning)",
      "creator": "Alibaba",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 45,
      "rankingEligible": true,
      "overallRank": 76,
      "url": "https://benchlm.ai/models/qwen3-235b-2507-reasoning",
      "markdownUrl": "https://benchlm.ai/md/models/qwen3-235b-2507-reasoning.md",
      "score": 45,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 0,
      "scoreConfidence": 1
    },
    {
      "rank": 77,
      "slug": "gpt-4-1-mini",
      "canonicalModelKey": "gpt-4-1-mini",
      "model": "GPT-4.1 mini",
      "creator": "OpenAI",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 45,
      "rankingEligible": true,
      "overallRank": 77,
      "url": "https://benchlm.ai/models/gpt-4-1-mini",
      "markdownUrl": "https://benchlm.ai/md/models/gpt-4-1-mini.md",
      "score": 45,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 4,
      "scoreConfidence": 1
    },
    {
      "rank": 78,
      "slug": "nemotron-3-super-100b",
      "canonicalModelKey": "nemotron-3-super-100b",
      "model": "Nemotron 3 Super 100B",
      "creator": "NVIDIA",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 43,
      "rankingEligible": true,
      "overallRank": 78,
      "url": "https://benchlm.ai/models/nemotron-3-super-100b",
      "markdownUrl": "https://benchlm.ai/md/models/nemotron-3-super-100b.md",
      "score": 43,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 1,
      "scoreConfidence": 1
    },
    {
      "rank": 79,
      "slug": "o4-mini-high",
      "canonicalModelKey": "o4-mini-high",
      "model": "o4-mini (high)",
      "creator": "OpenAI",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "200K",
      "contextWindowTokens": 200000,
      "displayScore": 43,
      "rankingEligible": true,
      "overallRank": 79,
      "url": "https://benchlm.ai/models/o4-mini-high",
      "markdownUrl": "https://benchlm.ai/md/models/o4-mini-high.md",
      "score": 43,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 0,
      "scoreConfidence": 1
    },
    {
      "rank": 80,
      "slug": "claude-4-1-opus-thinking",
      "canonicalModelKey": "claude-4-1-opus-thinking",
      "model": "Claude 4.1 Opus Thinking",
      "creator": "Anthropic",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "200K",
      "contextWindowTokens": 200000,
      "displayScore": 43,
      "rankingEligible": true,
      "overallRank": 80,
      "url": "https://benchlm.ai/models/claude-4-1-opus-thinking",
      "markdownUrl": "https://benchlm.ai/md/models/claude-4-1-opus-thinking.md",
      "score": 43,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 0,
      "scoreConfidence": 1
    },
    {
      "rank": 81,
      "slug": "gpt-4o",
      "canonicalModelKey": "gpt-4o",
      "model": "GPT-4o",
      "creator": "OpenAI",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 42,
      "rankingEligible": true,
      "overallRank": 81,
      "url": "https://benchlm.ai/models/gpt-4o",
      "markdownUrl": "https://benchlm.ai/md/models/gpt-4o.md",
      "score": 42,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 0,
      "scoreConfidence": 1
    },
    {
      "rank": 82,
      "slug": "kimi-k2",
      "canonicalModelKey": "kimi-k2",
      "model": "Kimi K2",
      "creator": "Moonshot AI",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 41,
      "rankingEligible": true,
      "overallRank": 82,
      "url": "https://benchlm.ai/models/kimi-k2",
      "markdownUrl": "https://benchlm.ai/md/models/kimi-k2.md",
      "score": 41,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 0,
      "scoreConfidence": 1
    },
    {
      "rank": 83,
      "slug": "llama-3-1-405b",
      "canonicalModelKey": "llama-3-1-405b",
      "model": "Llama 3.1 405B",
      "creator": "Meta",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 40,
      "rankingEligible": true,
      "overallRank": 83,
      "url": "https://benchlm.ai/models/llama-3-1-405b",
      "markdownUrl": "https://benchlm.ai/md/models/llama-3-1-405b.md",
      "score": 40,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 0,
      "scoreConfidence": 1
    },
    {
      "rank": 84,
      "slug": "claude-3-5-sonnet",
      "canonicalModelKey": "claude-3-5-sonnet",
      "model": "Claude 3.5 Sonnet",
      "creator": "Anthropic",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "200K",
      "contextWindowTokens": 200000,
      "displayScore": 40,
      "rankingEligible": true,
      "overallRank": 84,
      "url": "https://benchlm.ai/models/claude-3-5-sonnet",
      "markdownUrl": "https://benchlm.ai/md/models/claude-3-5-sonnet.md",
      "score": 40,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 0,
      "scoreConfidence": 1
    },
    {
      "rank": 85,
      "slug": "grok-code-fast-1",
      "canonicalModelKey": "grok-code-fast-1",
      "model": "Grok Code Fast 1",
      "creator": "xAI",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "256K",
      "contextWindowTokens": 256000,
      "displayScore": 39,
      "rankingEligible": true,
      "overallRank": 85,
      "url": "https://benchlm.ai/models/grok-code-fast-1",
      "markdownUrl": "https://benchlm.ai/md/models/grok-code-fast-1.md",
      "score": 39,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 0,
      "scoreConfidence": 1
    },
    {
      "rank": 86,
      "slug": "sarvam-105b",
      "canonicalModelKey": "sarvam-105b",
      "model": "Sarvam 105B",
      "creator": "Sarvam",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 39,
      "rankingEligible": true,
      "overallRank": 86,
      "url": "https://benchlm.ai/models/sarvam-105b",
      "markdownUrl": "https://benchlm.ai/md/models/sarvam-105b.md",
      "score": 39,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 0,
      "scoreConfidence": 1
    },
    {
      "rank": 87,
      "slug": "mistral-large-2",
      "canonicalModelKey": "mistral-large-2",
      "model": "Mistral Large 2",
      "creator": "Mistral",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 38,
      "rankingEligible": true,
      "overallRank": 87,
      "url": "https://benchlm.ai/models/mistral-large-2",
      "markdownUrl": "https://benchlm.ai/md/models/mistral-large-2.md",
      "score": 38,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 0,
      "scoreConfidence": 1
    },
    {
      "rank": 88,
      "slug": "gemini-2-5-flash",
      "canonicalModelKey": "gemini-2-5-flash",
      "model": "Gemini 2.5 Flash",
      "creator": "Google",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 37,
      "rankingEligible": true,
      "overallRank": 88,
      "url": "https://benchlm.ai/models/gemini-2-5-flash",
      "markdownUrl": "https://benchlm.ai/md/models/gemini-2-5-flash.md",
      "score": 37,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 0,
      "scoreConfidence": 1
    },
    {
      "rank": 89,
      "slug": "gemini-1-5-pro",
      "canonicalModelKey": "gemini-1-5-pro",
      "model": "Gemini 1.5 Pro",
      "creator": "Google",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "2M",
      "contextWindowTokens": 2000000,
      "displayScore": 35,
      "rankingEligible": true,
      "overallRank": 89,
      "url": "https://benchlm.ai/models/gemini-1-5-pro",
      "markdownUrl": "https://benchlm.ai/md/models/gemini-1-5-pro.md",
      "score": 35,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 0,
      "scoreConfidence": 1
    },
    {
      "rank": 90,
      "slug": "deepseek-v3",
      "canonicalModelKey": "deepseek-v3",
      "model": "DeepSeek V3",
      "creator": "DeepSeek",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 34,
      "rankingEligible": true,
      "overallRank": 90,
      "url": "https://benchlm.ai/models/deepseek-v3",
      "markdownUrl": "https://benchlm.ai/md/models/deepseek-v3.md",
      "score": 34,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 0,
      "scoreConfidence": 1
    },
    {
      "rank": 91,
      "slug": "gpt-oss-120b",
      "canonicalModelKey": "gpt-oss-120b",
      "model": "GPT-OSS 120B",
      "creator": "OpenAI",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 34,
      "rankingEligible": true,
      "overallRank": 91,
      "url": "https://benchlm.ai/models/gpt-oss-120b",
      "markdownUrl": "https://benchlm.ai/md/models/gpt-oss-120b.md",
      "score": 34,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 1,
      "scoreConfidence": 1
    },
    {
      "rank": 92,
      "slug": "claude-3-opus",
      "canonicalModelKey": "claude-3-opus",
      "model": "Claude 3 Opus",
      "creator": "Anthropic",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "200K",
      "contextWindowTokens": 200000,
      "displayScore": 34,
      "rankingEligible": true,
      "overallRank": 92,
      "url": "https://benchlm.ai/models/claude-3-opus",
      "markdownUrl": "https://benchlm.ai/md/models/claude-3-opus.md",
      "score": 34,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 0,
      "scoreConfidence": 1
    },
    {
      "rank": 93,
      "slug": "minicpm5-1b",
      "canonicalModelKey": "minicpm5-1b",
      "model": "MiniCPM5-1B",
      "creator": "OpenBMB",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "131K",
      "contextWindowTokens": 131000,
      "displayScore": 34,
      "rankingEligible": true,
      "overallRank": 93,
      "url": "https://benchlm.ai/models/minicpm5-1b",
      "markdownUrl": "https://benchlm.ai/md/models/minicpm5-1b.md",
      "score": 34,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 14,
      "scoreConfidence": 1
    },
    {
      "rank": 94,
      "slug": "deepseek-r1",
      "canonicalModelKey": "deepseek-r1",
      "model": "DeepSeek-R1",
      "creator": "DeepSeek",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 32,
      "rankingEligible": true,
      "overallRank": 94,
      "url": "https://benchlm.ai/models/deepseek-r1",
      "markdownUrl": "https://benchlm.ai/md/models/deepseek-r1.md",
      "score": 32,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 0,
      "scoreConfidence": 1
    },
    {
      "rank": 95,
      "slug": "qwen3-235b-2507",
      "canonicalModelKey": "qwen3-235b-2507",
      "model": "Qwen3 235B 2507",
      "creator": "Alibaba",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 32,
      "rankingEligible": true,
      "overallRank": 95,
      "url": "https://benchlm.ai/models/qwen3-235b-2507",
      "markdownUrl": "https://benchlm.ai/md/models/qwen3-235b-2507.md",
      "score": 32,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 4,
      "scoreConfidence": 1
    },
    {
      "rank": 96,
      "slug": "dbrx-instruct",
      "canonicalModelKey": "dbrx-instruct",
      "model": "DBRX Instruct",
      "creator": "Databricks",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "32K",
      "contextWindowTokens": 32000,
      "displayScore": 32,
      "rankingEligible": true,
      "overallRank": 96,
      "url": "https://benchlm.ai/models/dbrx-instruct",
      "markdownUrl": "https://benchlm.ai/md/models/dbrx-instruct.md",
      "score": 32,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 0,
      "scoreConfidence": 1
    },
    {
      "rank": 97,
      "slug": "grok-3-beta",
      "canonicalModelKey": "grok-3-beta",
      "model": "Grok 3 [Beta]",
      "creator": "xAI",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 30,
      "rankingEligible": true,
      "overallRank": 97,
      "url": "https://benchlm.ai/models/grok-3-beta",
      "markdownUrl": "https://benchlm.ai/md/models/grok-3-beta.md",
      "score": 30,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 0,
      "scoreConfidence": 1
    },
    {
      "rank": 98,
      "slug": "deepseek-v3-1-reasoning",
      "canonicalModelKey": "deepseek-v3-1-reasoning",
      "model": "DeepSeek V3.1 (Reasoning)",
      "creator": "DeepSeek",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 29,
      "rankingEligible": true,
      "overallRank": 98,
      "url": "https://benchlm.ai/models/deepseek-v3-1-reasoning",
      "markdownUrl": "https://benchlm.ai/md/models/deepseek-v3-1-reasoning.md",
      "score": 29,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 0,
      "scoreConfidence": 1
    },
    {
      "rank": 99,
      "slug": "o1-pro",
      "canonicalModelKey": "o1-pro",
      "model": "o1-pro",
      "creator": "OpenAI",
      "sourceType": "Proprietary",
      "reasoningType": "Reasoning",
      "contextWindow": "200K",
      "contextWindowTokens": 200000,
      "displayScore": 28,
      "rankingEligible": true,
      "overallRank": 99,
      "url": "https://benchlm.ai/models/o1-pro",
      "markdownUrl": "https://benchlm.ai/md/models/o1-pro.md",
      "score": 28,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 0,
      "scoreConfidence": 1
    },
    {
      "rank": 100,
      "slug": "phi-4",
      "canonicalModelKey": "phi-4",
      "model": "Phi-4",
      "creator": "Microsoft",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "16K",
      "contextWindowTokens": 16000,
      "displayScore": 27,
      "rankingEligible": true,
      "overallRank": 100,
      "url": "https://benchlm.ai/models/phi-4",
      "markdownUrl": "https://benchlm.ai/md/models/phi-4.md",
      "score": 27,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 0,
      "scoreConfidence": 1
    },
    {
      "rank": 101,
      "slug": "gpt-4-1-nano",
      "canonicalModelKey": "gpt-4-1-nano",
      "model": "GPT-4.1 nano",
      "creator": "OpenAI",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 27,
      "rankingEligible": true,
      "overallRank": 101,
      "url": "https://benchlm.ai/models/gpt-4-1-nano",
      "markdownUrl": "https://benchlm.ai/md/models/gpt-4-1-nano.md",
      "score": 27,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 3,
      "scoreConfidence": 1
    },
    {
      "rank": 102,
      "slug": "glm-4-5",
      "canonicalModelKey": "glm-4-5",
      "model": "GLM-4.5",
      "creator": "Z.AI",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 25,
      "rankingEligible": true,
      "overallRank": 102,
      "url": "https://benchlm.ai/models/glm-4-5",
      "markdownUrl": "https://benchlm.ai/md/models/glm-4-5.md",
      "score": 25,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 0,
      "scoreConfidence": 1
    },
    {
      "rank": 103,
      "slug": "llama-4-scout",
      "canonicalModelKey": "llama-4-scout",
      "model": "Llama 4 Scout",
      "creator": "Meta",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "10M",
      "contextWindowTokens": 10000000,
      "displayScore": 25,
      "rankingEligible": true,
      "overallRank": 103,
      "url": "https://benchlm.ai/models/llama-4-scout",
      "markdownUrl": "https://benchlm.ai/md/models/llama-4-scout.md",
      "score": 25,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 0,
      "scoreConfidence": 1
    },
    {
      "rank": 104,
      "slug": "nemotron-3-nano-30b",
      "canonicalModelKey": "nemotron-3-nano-30b",
      "model": "Nemotron 3 Nano 30B",
      "creator": "NVIDIA",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "32K",
      "contextWindowTokens": 32000,
      "displayScore": 25,
      "rankingEligible": true,
      "overallRank": 104,
      "url": "https://benchlm.ai/models/nemotron-3-nano-30b",
      "markdownUrl": "https://benchlm.ai/md/models/nemotron-3-nano-30b.md",
      "score": 25,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 0,
      "scoreConfidence": 1
    },
    {
      "rank": 105,
      "slug": "llama-3-70b",
      "canonicalModelKey": "llama-3-70b",
      "model": "Llama 3 70B",
      "creator": "Meta",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 25,
      "rankingEligible": true,
      "overallRank": 105,
      "url": "https://benchlm.ai/models/llama-3-70b",
      "markdownUrl": "https://benchlm.ai/md/models/llama-3-70b.md",
      "score": 25,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 0,
      "scoreConfidence": 1
    },
    {
      "rank": 106,
      "slug": "deepseek-v3-1",
      "canonicalModelKey": "deepseek-v3-1",
      "model": "DeepSeek V3.1",
      "creator": "DeepSeek",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 24,
      "rankingEligible": true,
      "overallRank": 106,
      "url": "https://benchlm.ai/models/deepseek-v3-1",
      "markdownUrl": "https://benchlm.ai/md/models/deepseek-v3-1.md",
      "score": 24,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 0,
      "scoreConfidence": 1
    },
    {
      "rank": 107,
      "slug": "gpt-4-turbo",
      "canonicalModelKey": "gpt-4-turbo",
      "model": "GPT-4 Turbo",
      "creator": "OpenAI",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 24,
      "rankingEligible": true,
      "overallRank": 107,
      "url": "https://benchlm.ai/models/gpt-4-turbo",
      "markdownUrl": "https://benchlm.ai/md/models/gpt-4-turbo.md",
      "score": 24,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 0,
      "scoreConfidence": 1
    },
    {
      "rank": 108,
      "slug": "gemini-1-0-pro",
      "canonicalModelKey": "gemini-1-0-pro",
      "model": "Gemini 1.0 Pro",
      "creator": "Google",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "32K",
      "contextWindowTokens": 32000,
      "displayScore": 24,
      "rankingEligible": true,
      "overallRank": 108,
      "url": "https://benchlm.ai/models/gemini-1-0-pro",
      "markdownUrl": "https://benchlm.ai/md/models/gemini-1-0-pro.md",
      "score": 24,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 0,
      "scoreConfidence": 1
    },
    {
      "rank": 109,
      "slug": "z-1",
      "canonicalModelKey": "z-1",
      "model": "Z-1",
      "creator": "Z",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 23,
      "rankingEligible": true,
      "overallRank": 109,
      "url": "https://benchlm.ai/models/z-1",
      "markdownUrl": "https://benchlm.ai/md/models/z-1.md",
      "score": 23,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 0,
      "scoreConfidence": 1
    },
    {
      "rank": 110,
      "slug": "mistral-8x7b",
      "canonicalModelKey": "mistral-8x7b",
      "model": "Mistral 8x7B",
      "creator": "Mistral",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "32K",
      "contextWindowTokens": 32000,
      "displayScore": 23,
      "rankingEligible": true,
      "overallRank": 110,
      "url": "https://benchlm.ai/models/mistral-8x7b",
      "markdownUrl": "https://benchlm.ai/md/models/mistral-8x7b.md",
      "score": 23,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 0,
      "scoreConfidence": 1
    },
    {
      "rank": 111,
      "slug": "claude-3-haiku",
      "canonicalModelKey": "claude-3-haiku",
      "model": "Claude 3 Haiku",
      "creator": "Anthropic",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "200K",
      "contextWindowTokens": 200000,
      "displayScore": 23,
      "rankingEligible": true,
      "overallRank": 111,
      "url": "https://benchlm.ai/models/claude-3-haiku",
      "markdownUrl": "https://benchlm.ai/md/models/claude-3-haiku.md",
      "score": 23,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 0,
      "scoreConfidence": 1
    },
    {
      "rank": 112,
      "slug": "mixtral-8x22b-instruct-v0-1",
      "canonicalModelKey": "mixtral-8x22b-instruct-v0-1",
      "model": "Mixtral 8x22B Instruct v0.1",
      "creator": "Mistral",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "64K",
      "contextWindowTokens": 64000,
      "displayScore": 22,
      "rankingEligible": true,
      "overallRank": 112,
      "url": "https://benchlm.ai/models/mixtral-8x22b-instruct-v0-1",
      "markdownUrl": "https://benchlm.ai/md/models/mixtral-8x22b-instruct-v0-1.md",
      "score": 22,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 0,
      "scoreConfidence": 1
    },
    {
      "rank": 113,
      "slug": "nemotron-4-15b",
      "canonicalModelKey": "nemotron-4-15b",
      "model": "Nemotron-4 15B",
      "creator": "NVIDIA",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "32K",
      "contextWindowTokens": 32000,
      "displayScore": 22,
      "rankingEligible": true,
      "overallRank": 113,
      "url": "https://benchlm.ai/models/nemotron-4-15b",
      "markdownUrl": "https://benchlm.ai/md/models/nemotron-4-15b.md",
      "score": 22,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 0,
      "scoreConfidence": 1
    },
    {
      "rank": 114,
      "slug": "moonshot-v1",
      "canonicalModelKey": "moonshot-v1",
      "model": "Moonshot v1",
      "creator": "Moonshot AI",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 22,
      "rankingEligible": true,
      "overallRank": 114,
      "url": "https://benchlm.ai/models/moonshot-v1",
      "markdownUrl": "https://benchlm.ai/md/models/moonshot-v1.md",
      "score": 22,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 0,
      "scoreConfidence": 1
    },
    {
      "rank": 115,
      "slug": "nemotron-ultra-253b",
      "canonicalModelKey": "nemotron-ultra-253b",
      "model": "Nemotron Ultra 253B",
      "creator": "NVIDIA",
      "sourceType": "Open Weight",
      "reasoningType": "Reasoning",
      "contextWindow": "32K",
      "contextWindowTokens": 32000,
      "displayScore": 22,
      "rankingEligible": true,
      "overallRank": 115,
      "url": "https://benchlm.ai/models/nemotron-ultra-253b",
      "markdownUrl": "https://benchlm.ai/md/models/nemotron-ultra-253b.md",
      "score": 22,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 0,
      "scoreConfidence": 1
    },
    {
      "rank": 116,
      "slug": "glm-4-5-air",
      "canonicalModelKey": "glm-4-5-air",
      "model": "GLM-4.5-Air",
      "creator": "Z.AI",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 18,
      "rankingEligible": true,
      "overallRank": 116,
      "url": "https://benchlm.ai/models/glm-4-5-air",
      "markdownUrl": "https://benchlm.ai/md/models/glm-4-5-air.md",
      "score": 18,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 0,
      "scoreConfidence": 1
    },
    {
      "rank": 117,
      "slug": "llama-4-maverick",
      "canonicalModelKey": "llama-4-maverick",
      "model": "Llama 4 Maverick",
      "creator": "Meta",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "1M",
      "contextWindowTokens": 1000000,
      "displayScore": 17,
      "rankingEligible": true,
      "overallRank": 117,
      "url": "https://benchlm.ai/models/llama-4-maverick",
      "markdownUrl": "https://benchlm.ai/md/models/llama-4-maverick.md",
      "score": 17,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 0,
      "scoreConfidence": 1
    },
    {
      "rank": 118,
      "slug": "gemma-3-27b",
      "canonicalModelKey": "gemma-3-27b",
      "model": "Gemma 3 27B",
      "creator": "Google",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "32K",
      "contextWindowTokens": 32000,
      "displayScore": 16,
      "rankingEligible": true,
      "overallRank": 118,
      "url": "https://benchlm.ai/models/gemma-3-27b",
      "markdownUrl": "https://benchlm.ai/md/models/gemma-3-27b.md",
      "score": 16,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 0,
      "scoreConfidence": 1
    },
    {
      "rank": 119,
      "slug": "gpt-oss-20b",
      "canonicalModelKey": "gpt-oss-20b",
      "model": "GPT-OSS 20B",
      "creator": "OpenAI",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 16,
      "rankingEligible": true,
      "overallRank": 119,
      "url": "https://benchlm.ai/models/gpt-oss-20b",
      "markdownUrl": "https://benchlm.ai/md/models/gpt-oss-20b.md",
      "score": 16,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 1,
      "scoreConfidence": 1
    },
    {
      "rank": 120,
      "slug": "llama-4-behemoth",
      "canonicalModelKey": "llama-4-behemoth",
      "model": "Llama 4 Behemoth",
      "creator": "Meta",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "32K",
      "contextWindowTokens": 32000,
      "displayScore": 11,
      "rankingEligible": true,
      "overallRank": 120,
      "url": "https://benchlm.ai/models/llama-4-behemoth",
      "markdownUrl": "https://benchlm.ai/md/models/llama-4-behemoth.md",
      "score": 11,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 0,
      "scoreConfidence": 1
    },
    {
      "rank": 121,
      "slug": "nova-pro",
      "canonicalModelKey": "nova-pro",
      "model": "Nova Pro",
      "creator": "Amazon",
      "sourceType": "Proprietary",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "128K",
      "contextWindowTokens": 128000,
      "displayScore": 10,
      "rankingEligible": true,
      "overallRank": 121,
      "url": "https://benchlm.ai/models/nova-pro",
      "markdownUrl": "https://benchlm.ai/md/models/nova-pro.md",
      "score": 10,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 0,
      "scoreConfidence": 1
    },
    {
      "rank": 122,
      "slug": "mistral-7b-v0-3",
      "canonicalModelKey": "mistral-7b-v0-3",
      "model": "Mistral 7B v0.3",
      "creator": "Mistral",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "32K",
      "contextWindowTokens": 32000,
      "displayScore": 4,
      "rankingEligible": true,
      "overallRank": 122,
      "url": "https://benchlm.ai/models/mistral-7b-v0-3",
      "markdownUrl": "https://benchlm.ai/md/models/mistral-7b-v0-3.md",
      "score": 4,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 0,
      "scoreConfidence": 1
    },
    {
      "rank": 123,
      "slug": "mistral-8x7b-v0-2",
      "canonicalModelKey": "mistral-8x7b-v0-2",
      "model": "Mistral 8x7B v0.2",
      "creator": "Mistral",
      "sourceType": "Open Weight",
      "reasoningType": "Non-Reasoning",
      "contextWindow": "32K",
      "contextWindowTokens": 32000,
      "displayScore": 1,
      "rankingEligible": true,
      "overallRank": 123,
      "url": "https://benchlm.ai/models/mistral-8x7b-v0-2",
      "markdownUrl": "https://benchlm.ai/md/models/mistral-8x7b-v0-2.md",
      "score": 1,
      "category": null,
      "categoryLabel": null,
      "trustedBenchmarkCount": 0,
      "scoreConfidence": 1
    }
  ],
  "categories": {
    "agentic": [
      {
        "rank": 1,
        "slug": "claude-mythos-5",
        "canonicalModelKey": "claude-mythos-5",
        "model": "Claude Mythos 5",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "1M+",
        "contextWindowTokens": 1000000,
        "displayScore": 99,
        "rankingEligible": true,
        "overallRank": 1,
        "url": "https://benchlm.ai/models/claude-mythos-5",
        "markdownUrl": "https://benchlm.ai/md/models/claude-mythos-5.md",
        "score": 100,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 17,
        "scoreConfidence": 3
      },
      {
        "rank": 2,
        "slug": "claude-fable",
        "canonicalModelKey": "claude-fable-5",
        "model": "Claude Fable 5",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "1M+",
        "contextWindowTokens": 1000000,
        "displayScore": 97,
        "rankingEligible": true,
        "overallRank": 2,
        "url": "https://benchlm.ai/models/claude-fable",
        "markdownUrl": "https://benchlm.ai/md/models/claude-fable.md",
        "score": 100,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 17,
        "scoreConfidence": 3
      },
      {
        "rank": 3,
        "slug": "claude-opus-4-8",
        "canonicalModelKey": "claude-opus-4-8",
        "model": "Claude Opus 4.8",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 93,
        "rankingEligible": true,
        "overallRank": 3,
        "url": "https://benchlm.ai/models/claude-opus-4-8",
        "markdownUrl": "https://benchlm.ai/md/models/claude-opus-4-8.md",
        "score": 96.4,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 23,
        "scoreConfidence": 2
      },
      {
        "rank": 4,
        "slug": "gpt-5-5",
        "canonicalModelKey": "gpt-5-5",
        "model": "GPT-5.5",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 89,
        "rankingEligible": true,
        "overallRank": 7,
        "url": "https://benchlm.ai/models/gpt-5-5",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-5.md",
        "score": 95.9,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 22,
        "scoreConfidence": 3
      },
      {
        "rank": 5,
        "slug": "gemini-3-5-flash",
        "canonicalModelKey": "gemini-3-5-flash",
        "model": "Gemini 3.5 Flash",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 86,
        "rankingEligible": true,
        "overallRank": 13,
        "url": "https://benchlm.ai/models/gemini-3-5-flash",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-3-5-flash.md",
        "score": 95.1,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 20,
        "scoreConfidence": 3
      },
      {
        "rank": 6,
        "slug": "gemini-3-pro-deep-think",
        "canonicalModelKey": "gemini-3-pro-deep-think",
        "model": "Gemini 3 Pro Deep Think",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "2M",
        "contextWindowTokens": 2000000,
        "displayScore": 89,
        "rankingEligible": true,
        "overallRank": 8,
        "url": "https://benchlm.ai/models/gemini-3-pro-deep-think",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-3-pro-deep-think.md",
        "score": 93.9,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 7,
        "slug": "gpt-5-4-pro",
        "canonicalModelKey": "gpt-5-4-pro",
        "model": "GPT-5.4 Pro",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "1.05M",
        "contextWindowTokens": 1050000,
        "displayScore": 90,
        "rankingEligible": true,
        "overallRank": 6,
        "url": "https://benchlm.ai/models/gpt-5-4-pro",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-4-pro.md",
        "score": 89.6,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 9,
        "scoreConfidence": 2
      },
      {
        "rank": 8,
        "slug": "o1-preview",
        "canonicalModelKey": "o1-preview",
        "model": "o1-preview",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 82,
        "rankingEligible": true,
        "overallRank": 20,
        "url": "https://benchlm.ai/models/o1-preview",
        "markdownUrl": "https://benchlm.ai/md/models/o1-preview.md",
        "score": 87.9,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 9,
        "slug": "claude-opus-4-7-adaptive",
        "canonicalModelKey": "claude-opus-4-7-max",
        "model": "Claude Opus 4.7 (Adaptive)",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 84,
        "rankingEligible": true,
        "overallRank": 16,
        "url": "https://benchlm.ai/models/claude-opus-4-7-adaptive",
        "markdownUrl": "https://benchlm.ai/md/models/claude-opus-4-7-adaptive.md",
        "score": 86.2,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 18,
        "scoreConfidence": 3
      },
      {
        "rank": 10,
        "slug": "minimax-m3",
        "canonicalModelKey": "minimax-m3",
        "model": "MiniMax M3",
        "creator": "MiniMax",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 79,
        "rankingEligible": true,
        "overallRank": 23,
        "url": "https://benchlm.ai/models/minimax-m3",
        "markdownUrl": "https://benchlm.ai/md/models/minimax-m3.md",
        "score": 85.3,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 15,
        "scoreConfidence": 2
      },
      {
        "rank": 11,
        "slug": "gpt-5-4",
        "canonicalModelKey": "gpt-5-4",
        "model": "GPT-5.4",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "1.05M",
        "contextWindowTokens": 1050000,
        "displayScore": 88,
        "rankingEligible": true,
        "overallRank": 10,
        "url": "https://benchlm.ai/models/gpt-5-4",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-4.md",
        "score": 84.5,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 26,
        "scoreConfidence": 4
      },
      {
        "rank": 12,
        "slug": "kimi-2-6",
        "canonicalModelKey": "kimi-2-6",
        "model": "Kimi K2.6",
        "creator": "Moonshot AI",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "256K",
        "contextWindowTokens": 256000,
        "displayScore": 81,
        "rankingEligible": true,
        "overallRank": 21,
        "url": "https://benchlm.ai/models/kimi-2-6",
        "markdownUrl": "https://benchlm.ai/md/models/kimi-2-6.md",
        "score": 82.7,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 28,
        "scoreConfidence": 2
      },
      {
        "rank": 13,
        "slug": "gemini-3-1-pro",
        "canonicalModelKey": "gemini-3-1-pro",
        "model": "Gemini 3.1 Pro",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 91,
        "rankingEligible": true,
        "overallRank": 4,
        "url": "https://benchlm.ai/models/gemini-3-1-pro",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-3-1-pro.md",
        "score": 81.6,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 17,
        "scoreConfidence": 3
      },
      {
        "rank": 14,
        "slug": "claude-opus-4-6",
        "canonicalModelKey": "claude-opus-4-6",
        "model": "Claude Opus 4.6",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 86,
        "rankingEligible": true,
        "overallRank": 12,
        "url": "https://benchlm.ai/models/claude-opus-4-6",
        "markdownUrl": "https://benchlm.ai/md/models/claude-opus-4-6.md",
        "score": 81.3,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 27,
        "scoreConfidence": 4
      },
      {
        "rank": 15,
        "slug": "claude-sonnet-4-6",
        "canonicalModelKey": "claude-sonnet-4-6",
        "model": "Claude Sonnet 4.6",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 82,
        "rankingEligible": true,
        "overallRank": 18,
        "url": "https://benchlm.ai/models/claude-sonnet-4-6",
        "markdownUrl": "https://benchlm.ai/md/models/claude-sonnet-4-6.md",
        "score": 78.7,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 14,
        "scoreConfidence": 3
      },
      {
        "rank": 16,
        "slug": "gpt-5-3-codex",
        "canonicalModelKey": "gpt-5-3-codex",
        "model": "GPT-5.3 Codex",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "400K",
        "contextWindowTokens": 400000,
        "displayScore": 85,
        "rankingEligible": true,
        "overallRank": 15,
        "url": "https://benchlm.ai/models/gpt-5-3-codex",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-3-codex.md",
        "score": 77.4,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 6,
        "scoreConfidence": 1
      },
      {
        "rank": 17,
        "slug": "glm-5-reasoning",
        "canonicalModelKey": "glm-5-reasoning",
        "model": "GLM-5 (Reasoning)",
        "creator": "Z.AI",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 79,
        "rankingEligible": true,
        "overallRank": 24,
        "url": "https://benchlm.ai/models/glm-5-reasoning",
        "markdownUrl": "https://benchlm.ai/md/models/glm-5-reasoning.md",
        "score": 77,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 18,
        "slug": "grok-4-1",
        "canonicalModelKey": "grok-4-1",
        "model": "Grok 4.1",
        "creator": "xAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 89,
        "rankingEligible": true,
        "overallRank": 9,
        "url": "https://benchlm.ai/models/grok-4-1",
        "markdownUrl": "https://benchlm.ai/md/models/grok-4-1.md",
        "score": 76.6,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 19,
        "slug": "gpt-5-high",
        "canonicalModelKey": "gpt-5-high",
        "model": "GPT-5 (high)",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 76,
        "rankingEligible": true,
        "overallRank": 29,
        "url": "https://benchlm.ai/models/gpt-5-high",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-high.md",
        "score": 74.8,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 20,
        "slug": "gpt-5-1",
        "canonicalModelKey": "gpt-5-1",
        "model": "GPT-5.1",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 77,
        "rankingEligible": true,
        "overallRank": 27,
        "url": "https://benchlm.ai/models/gpt-5-1",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-1.md",
        "score": 74.1,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 21,
        "slug": "claude-opus-4-5",
        "canonicalModelKey": "claude-opus-4-5",
        "model": "Claude Opus 4.5",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 76,
        "rankingEligible": true,
        "overallRank": 28,
        "url": "https://benchlm.ai/models/claude-opus-4-5",
        "markdownUrl": "https://benchlm.ai/md/models/claude-opus-4-5.md",
        "score": 73.5,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 41,
        "scoreConfidence": 4
      },
      {
        "rank": 22,
        "slug": "gemini-3-pro",
        "canonicalModelKey": "gemini-3-pro",
        "model": "Gemini 3 Pro",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "2M",
        "contextWindowTokens": 2000000,
        "displayScore": 80,
        "rankingEligible": true,
        "overallRank": 22,
        "url": "https://benchlm.ai/models/gemini-3-pro",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-3-pro.md",
        "score": 70.6,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 8,
        "scoreConfidence": 2
      },
      {
        "rank": 23,
        "slug": "qwen3-5-397b-reasoning",
        "canonicalModelKey": "qwen3-5-397b-reasoning",
        "model": "Qwen3.5 397B (Reasoning)",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 77,
        "rankingEligible": true,
        "overallRank": 26,
        "url": "https://benchlm.ai/models/qwen3-5-397b-reasoning",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-5-397b-reasoning.md",
        "score": 68.8,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 24,
        "slug": "gpt-5-medium",
        "canonicalModelKey": "gpt-5-medium",
        "model": "GPT-5 (medium)",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 70,
        "rankingEligible": true,
        "overallRank": 37,
        "url": "https://benchlm.ai/models/gpt-5-medium",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-medium.md",
        "score": 68.1,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 25,
        "slug": "kimi-k2-5-reasoning",
        "canonicalModelKey": "kimi-k2-5-reasoning",
        "model": "Kimi K2.5 (Reasoning)",
        "creator": "Moonshot AI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 75,
        "rankingEligible": true,
        "overallRank": 31,
        "url": "https://benchlm.ai/models/kimi-k2-5-reasoning",
        "markdownUrl": "https://benchlm.ai/md/models/kimi-k2-5-reasoning.md",
        "score": 62,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 8,
        "scoreConfidence": 2
      },
      {
        "rank": 26,
        "slug": "o3-mini",
        "canonicalModelKey": "o3-mini",
        "model": "o3-mini",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 55,
        "rankingEligible": true,
        "overallRank": 64,
        "url": "https://benchlm.ai/models/o3-mini",
        "markdownUrl": "https://benchlm.ai/md/models/o3-mini.md",
        "score": 61.7,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 5,
        "scoreConfidence": 1
      },
      {
        "rank": 27,
        "slug": "deepseek-v3-2-thinking",
        "canonicalModelKey": "deepseek-v3-2-thinking",
        "model": "DeepSeek V3.2 (Thinking)",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 60,
        "rankingEligible": true,
        "overallRank": 53,
        "url": "https://benchlm.ai/models/deepseek-v3-2-thinking",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-v3-2-thinking.md",
        "score": 61.5,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 28,
        "slug": "grok-4-1-fast",
        "canonicalModelKey": "grok-4-1-fast",
        "model": "Grok 4.1 Fast",
        "creator": "xAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 68,
        "rankingEligible": true,
        "overallRank": 41,
        "url": "https://benchlm.ai/models/grok-4-1-fast",
        "markdownUrl": "https://benchlm.ai/md/models/grok-4-1-fast.md",
        "score": 61.4,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 29,
        "slug": "o3-pro",
        "canonicalModelKey": "o3-pro",
        "model": "o3-pro",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 57,
        "rankingEligible": true,
        "overallRank": 57,
        "url": "https://benchlm.ai/models/o3-pro",
        "markdownUrl": "https://benchlm.ai/md/models/o3-pro.md",
        "score": 61.4,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 30,
        "slug": "deepseek-coder-2-0",
        "canonicalModelKey": "deepseek-coder-2-0",
        "model": "DeepSeek Coder 2.0",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 51,
        "rankingEligible": true,
        "overallRank": 66,
        "url": "https://benchlm.ai/models/deepseek-coder-2-0",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-coder-2-0.md",
        "score": 59.8,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 31,
        "slug": "gpt-5-2",
        "canonicalModelKey": "gpt-5-2",
        "model": "GPT-5.2",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "400K",
        "contextWindowTokens": 400000,
        "displayScore": 78,
        "rankingEligible": true,
        "overallRank": 25,
        "url": "https://benchlm.ai/models/gpt-5-2",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-2.md",
        "score": 59.2,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 11,
        "scoreConfidence": 2
      },
      {
        "rank": 32,
        "slug": "gemini-2-5-pro",
        "canonicalModelKey": "gemini-2-5-pro",
        "model": "Gemini 2.5 Pro",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 63,
        "rankingEligible": true,
        "overallRank": 49,
        "url": "https://benchlm.ai/models/gemini-2-5-pro",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-2-5-pro.md",
        "score": 57.5,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 3,
        "scoreConfidence": 1
      },
      {
        "rank": 33,
        "slug": "mimo-v2-flash",
        "canonicalModelKey": "mimo-v2-flash",
        "model": "MiMo-V2-Flash",
        "creator": "Xiaomi",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "256K",
        "contextWindowTokens": 256000,
        "displayScore": 59,
        "rankingEligible": true,
        "overallRank": 54,
        "url": "https://benchlm.ai/models/mimo-v2-flash",
        "markdownUrl": "https://benchlm.ai/md/models/mimo-v2-flash.md",
        "score": 57.1,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 34,
        "slug": "o3",
        "canonicalModelKey": "o3",
        "model": "o3",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 56,
        "rankingEligible": true,
        "overallRank": 61,
        "url": "https://benchlm.ai/models/o3",
        "markdownUrl": "https://benchlm.ai/md/models/o3.md",
        "score": 56.5,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 35,
        "slug": "o1",
        "canonicalModelKey": "o1",
        "model": "o1",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 57,
        "rankingEligible": true,
        "overallRank": 58,
        "url": "https://benchlm.ai/models/o1",
        "markdownUrl": "https://benchlm.ai/md/models/o1.md",
        "score": 56.4,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 3,
        "scoreConfidence": 1
      },
      {
        "rank": 36,
        "slug": "qwen3-6-plus",
        "canonicalModelKey": "qwen3-6-plus",
        "model": "Qwen3.6 Plus",
        "creator": "Alibaba",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 66,
        "rankingEligible": true,
        "overallRank": 43,
        "url": "https://benchlm.ai/models/qwen3-6-plus",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-6-plus.md",
        "score": 55.9,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 38,
        "scoreConfidence": 4
      },
      {
        "rank": 37,
        "slug": "qwen3-5-122b-a10b",
        "canonicalModelKey": "qwen3-5-122b-a10b",
        "model": "Qwen3.5-122B-A10B",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "262K",
        "contextWindowTokens": 262000,
        "displayScore": 63,
        "rankingEligible": true,
        "overallRank": 48,
        "url": "https://benchlm.ai/models/qwen3-5-122b-a10b",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-5-122b-a10b.md",
        "score": 55.4,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 13,
        "scoreConfidence": 3
      },
      {
        "rank": 38,
        "slug": "gpt-4-1",
        "canonicalModelKey": "gpt-4-1",
        "model": "GPT-4.1",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 57,
        "rankingEligible": true,
        "overallRank": 56,
        "url": "https://benchlm.ai/models/gpt-4-1",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-4-1.md",
        "score": 55.4,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 4,
        "scoreConfidence": 1
      },
      {
        "rank": 39,
        "slug": "qwen2-5-1m",
        "canonicalModelKey": "qwen2-5-1m",
        "model": "Qwen2.5-1M",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 50,
        "rankingEligible": true,
        "overallRank": 69,
        "url": "https://benchlm.ai/models/qwen2-5-1m",
        "markdownUrl": "https://benchlm.ai/md/models/qwen2-5-1m.md",
        "score": 54.6,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 40,
        "slug": "claude-sonnet-4-5",
        "canonicalModelKey": "claude-sonnet-4-5",
        "model": "Claude Sonnet 4.5",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 64,
        "rankingEligible": true,
        "overallRank": 46,
        "url": "https://benchlm.ai/models/claude-sonnet-4-5",
        "markdownUrl": "https://benchlm.ai/md/models/claude-sonnet-4-5.md",
        "score": 54,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 6,
        "scoreConfidence": 1
      },
      {
        "rank": 41,
        "slug": "glm-4-7",
        "canonicalModelKey": "glm-4-7",
        "model": "GLM-4.7",
        "creator": "Z.AI",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 68,
        "rankingEligible": true,
        "overallRank": 40,
        "url": "https://benchlm.ai/models/glm-4-7",
        "markdownUrl": "https://benchlm.ai/md/models/glm-4-7.md",
        "score": 53.5,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 7,
        "scoreConfidence": 1
      },
      {
        "rank": 42,
        "slug": "qwen3-5-397b",
        "canonicalModelKey": "qwen3-5-397b",
        "model": "Qwen3.5 397B",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 62,
        "rankingEligible": true,
        "overallRank": 51,
        "url": "https://benchlm.ai/models/qwen3-5-397b",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-5-397b.md",
        "score": 53.5,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 36,
        "scoreConfidence": 4
      },
      {
        "rank": 43,
        "slug": "deepseekmath-v2",
        "canonicalModelKey": "deepseekmath-v2",
        "model": "DeepSeekMath V2",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 49,
        "rankingEligible": true,
        "overallRank": 73,
        "url": "https://benchlm.ai/models/deepseekmath-v2",
        "markdownUrl": "https://benchlm.ai/md/models/deepseekmath-v2.md",
        "score": 53.4,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 44,
        "slug": "grok-4",
        "canonicalModelKey": "grok-4",
        "model": "Grok 4",
        "creator": "xAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 63,
        "rankingEligible": true,
        "overallRank": 50,
        "url": "https://benchlm.ai/models/grok-4",
        "markdownUrl": "https://benchlm.ai/md/models/grok-4.md",
        "score": 53.1,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 45,
        "slug": "qwen3-5-27b",
        "canonicalModelKey": "qwen3-5-27b",
        "model": "Qwen3.5-27B",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "262K",
        "contextWindowTokens": 262000,
        "displayScore": 61,
        "rankingEligible": true,
        "overallRank": 52,
        "url": "https://benchlm.ai/models/qwen3-5-27b",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-5-27b.md",
        "score": 50.2,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 13,
        "scoreConfidence": 3
      },
      {
        "rank": 46,
        "slug": "deepseek-v3-2",
        "canonicalModelKey": "deepseek-v3-2",
        "model": "DeepSeek V3.2",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 56,
        "rankingEligible": true,
        "overallRank": 59,
        "url": "https://benchlm.ai/models/deepseek-v3-2",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-v3-2.md",
        "score": 50,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 4,
        "scoreConfidence": 1
      },
      {
        "rank": 47,
        "slug": "deepseek-llm-2-0",
        "canonicalModelKey": "deepseek-llm-2-0",
        "model": "DeepSeek LLM 2.0",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 50,
        "rankingEligible": true,
        "overallRank": 68,
        "url": "https://benchlm.ai/models/deepseek-llm-2-0",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-llm-2-0.md",
        "score": 49.4,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 48,
        "slug": "glm-5",
        "canonicalModelKey": "glm-5",
        "model": "GLM-5",
        "creator": "Z.AI",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 67,
        "rankingEligible": true,
        "overallRank": 42,
        "url": "https://benchlm.ai/models/glm-5",
        "markdownUrl": "https://benchlm.ai/md/models/glm-5.md",
        "score": 48.4,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 33,
        "scoreConfidence": 4
      },
      {
        "rank": 49,
        "slug": "qwen2-5-72b",
        "canonicalModelKey": "qwen2-5-72b",
        "model": "Qwen2.5-72B",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 49,
        "rankingEligible": true,
        "overallRank": 72,
        "url": "https://benchlm.ai/models/qwen2-5-72b",
        "markdownUrl": "https://benchlm.ai/md/models/qwen2-5-72b.md",
        "score": 46.4,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 50,
        "slug": "kimi-k2-5",
        "canonicalModelKey": "kimi-k2-5",
        "model": "Kimi K2.5",
        "creator": "Moonshot AI",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "256K",
        "contextWindowTokens": 256000,
        "displayScore": 63,
        "rankingEligible": true,
        "overallRank": 47,
        "url": "https://benchlm.ai/models/kimi-k2-5",
        "markdownUrl": "https://benchlm.ai/md/models/kimi-k2-5.md",
        "score": 45,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 39,
        "scoreConfidence": 4
      },
      {
        "rank": 51,
        "slug": "qwen3-5-35b-a3b",
        "canonicalModelKey": "qwen3-5-35b-a3b",
        "model": "Qwen3.5-35B-A3B",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "262K",
        "contextWindowTokens": 262000,
        "displayScore": 55,
        "rankingEligible": true,
        "overallRank": 62,
        "url": "https://benchlm.ai/models/qwen3-5-35b-a3b",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-5-35b-a3b.md",
        "score": 44.7,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 13,
        "scoreConfidence": 3
      },
      {
        "rank": 52,
        "slug": "gemini-3-flash",
        "canonicalModelKey": "gemini-3-flash",
        "model": "Gemini 3 Flash",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 55,
        "rankingEligible": true,
        "overallRank": 63,
        "url": "https://benchlm.ai/models/gemini-3-flash",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-3-flash.md",
        "score": 44.7,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 2,
        "scoreConfidence": 1
      },
      {
        "rank": 53,
        "slug": "claude-haiku-4-5",
        "canonicalModelKey": "claude-haiku-4-5",
        "model": "Claude Haiku 4.5",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 56,
        "rankingEligible": true,
        "overallRank": 60,
        "url": "https://benchlm.ai/models/claude-haiku-4-5",
        "markdownUrl": "https://benchlm.ai/md/models/claude-haiku-4-5.md",
        "score": 44.2,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 54,
        "slug": "claude-4-sonnet",
        "canonicalModelKey": "claude-4-sonnet",
        "model": "Claude 4 Sonnet",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 50,
        "rankingEligible": true,
        "overallRank": 70,
        "url": "https://benchlm.ai/models/claude-4-sonnet",
        "markdownUrl": "https://benchlm.ai/md/models/claude-4-sonnet.md",
        "score": 43.8,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 55,
        "slug": "nemotron-3-super-100b",
        "canonicalModelKey": "nemotron-3-super-100b",
        "model": "Nemotron 3 Super 100B",
        "creator": "NVIDIA",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 43,
        "rankingEligible": true,
        "overallRank": 78,
        "url": "https://benchlm.ai/models/nemotron-3-super-100b",
        "markdownUrl": "https://benchlm.ai/md/models/nemotron-3-super-100b.md",
        "score": 43.6,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 56,
        "slug": "gpt-4-1-mini",
        "canonicalModelKey": "gpt-4-1-mini",
        "model": "GPT-4.1 mini",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 45,
        "rankingEligible": true,
        "overallRank": 77,
        "url": "https://benchlm.ai/models/gpt-4-1-mini",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-4-1-mini.md",
        "score": 43.3,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 4,
        "scoreConfidence": 1
      },
      {
        "rank": 57,
        "slug": "gemini-3-1-flash-lite",
        "canonicalModelKey": "gemini-3-1-flash-lite",
        "model": "Gemini 3.1 Flash-Lite",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 47,
        "rankingEligible": true,
        "overallRank": 75,
        "url": "https://benchlm.ai/models/gemini-3-1-flash-lite",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-3-1-flash-lite.md",
        "score": 41.2,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 2,
        "scoreConfidence": 1
      },
      {
        "rank": 58,
        "slug": "grok-code-fast-1",
        "canonicalModelKey": "grok-code-fast-1",
        "model": "Grok Code Fast 1",
        "creator": "xAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "256K",
        "contextWindowTokens": 256000,
        "displayScore": 39,
        "rankingEligible": true,
        "overallRank": 85,
        "url": "https://benchlm.ai/models/grok-code-fast-1",
        "markdownUrl": "https://benchlm.ai/md/models/grok-code-fast-1.md",
        "score": 40.1,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 59,
        "slug": "gpt-4o-mini",
        "canonicalModelKey": "gpt-4o-mini",
        "model": "GPT-4o mini",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 49,
        "rankingEligible": true,
        "overallRank": 71,
        "url": "https://benchlm.ai/models/gpt-4o-mini",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-4o-mini.md",
        "score": 39,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 60,
        "slug": "qwen3-235b-2507-reasoning",
        "canonicalModelKey": "qwen3-235b-2507-reasoning",
        "model": "Qwen3 235B 2507 (Reasoning)",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 45,
        "rankingEligible": true,
        "overallRank": 76,
        "url": "https://benchlm.ai/models/qwen3-235b-2507-reasoning",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-235b-2507-reasoning.md",
        "score": 38.9,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 61,
        "slug": "gpt-4o",
        "canonicalModelKey": "gpt-4o",
        "model": "GPT-4o",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 42,
        "rankingEligible": true,
        "overallRank": 81,
        "url": "https://benchlm.ai/models/gpt-4o",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-4o.md",
        "score": 38.9,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 62,
        "slug": "claude-4-1-opus",
        "canonicalModelKey": "claude-4-1-opus",
        "model": "Claude 4.1 Opus",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 51,
        "rankingEligible": true,
        "overallRank": 67,
        "url": "https://benchlm.ai/models/claude-4-1-opus",
        "markdownUrl": "https://benchlm.ai/md/models/claude-4-1-opus.md",
        "score": 38.8,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 63,
        "slug": "o4-mini-high",
        "canonicalModelKey": "o4-mini-high",
        "model": "o4-mini (high)",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 43,
        "rankingEligible": true,
        "overallRank": 79,
        "url": "https://benchlm.ai/models/o4-mini-high",
        "markdownUrl": "https://benchlm.ai/md/models/o4-mini-high.md",
        "score": 38.7,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 64,
        "slug": "claude-3-5-sonnet",
        "canonicalModelKey": "claude-3-5-sonnet",
        "model": "Claude 3.5 Sonnet",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 40,
        "rankingEligible": true,
        "overallRank": 84,
        "url": "https://benchlm.ai/models/claude-3-5-sonnet",
        "markdownUrl": "https://benchlm.ai/md/models/claude-3-5-sonnet.md",
        "score": 38.4,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 65,
        "slug": "mistral-large-3",
        "canonicalModelKey": "mistral-large-3",
        "model": "Mistral Large 3",
        "creator": "Mistral",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 48,
        "rankingEligible": true,
        "overallRank": 74,
        "url": "https://benchlm.ai/models/mistral-large-3",
        "markdownUrl": "https://benchlm.ai/md/models/mistral-large-3.md",
        "score": 37.4,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 66,
        "slug": "gpt-oss-120b",
        "canonicalModelKey": "gpt-oss-120b",
        "model": "GPT-OSS 120B",
        "creator": "OpenAI",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 34,
        "rankingEligible": true,
        "overallRank": 91,
        "url": "https://benchlm.ai/models/gpt-oss-120b",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-oss-120b.md",
        "score": 34.4,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 67,
        "slug": "gemini-1-5-pro",
        "canonicalModelKey": "gemini-1-5-pro",
        "model": "Gemini 1.5 Pro",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "2M",
        "contextWindowTokens": 2000000,
        "displayScore": 35,
        "rankingEligible": true,
        "overallRank": 89,
        "url": "https://benchlm.ai/models/gemini-1-5-pro",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-1-5-pro.md",
        "score": 33.7,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 68,
        "slug": "claude-3-opus",
        "canonicalModelKey": "claude-3-opus",
        "model": "Claude 3 Opus",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 34,
        "rankingEligible": true,
        "overallRank": 92,
        "url": "https://benchlm.ai/models/claude-3-opus",
        "markdownUrl": "https://benchlm.ai/md/models/claude-3-opus.md",
        "score": 33.4,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 69,
        "slug": "gemini-2-5-flash",
        "canonicalModelKey": "gemini-2-5-flash",
        "model": "Gemini 2.5 Flash",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 37,
        "rankingEligible": true,
        "overallRank": 88,
        "url": "https://benchlm.ai/models/gemini-2-5-flash",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-2-5-flash.md",
        "score": 33,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 70,
        "slug": "deepseek-v3-1-reasoning",
        "canonicalModelKey": "deepseek-v3-1-reasoning",
        "model": "DeepSeek V3.1 (Reasoning)",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 29,
        "rankingEligible": true,
        "overallRank": 98,
        "url": "https://benchlm.ai/models/deepseek-v3-1-reasoning",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-v3-1-reasoning.md",
        "score": 32.9,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 71,
        "slug": "mistral-large-2",
        "canonicalModelKey": "mistral-large-2",
        "model": "Mistral Large 2",
        "creator": "Mistral",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 38,
        "rankingEligible": true,
        "overallRank": 87,
        "url": "https://benchlm.ai/models/mistral-large-2",
        "markdownUrl": "https://benchlm.ai/md/models/mistral-large-2.md",
        "score": 32.5,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 72,
        "slug": "llama-3-1-405b",
        "canonicalModelKey": "llama-3-1-405b",
        "model": "Llama 3.1 405B",
        "creator": "Meta",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 40,
        "rankingEligible": true,
        "overallRank": 83,
        "url": "https://benchlm.ai/models/llama-3-1-405b",
        "markdownUrl": "https://benchlm.ai/md/models/llama-3-1-405b.md",
        "score": 31.4,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 73,
        "slug": "deepseek-r1",
        "canonicalModelKey": "deepseek-r1",
        "model": "DeepSeek-R1",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 32,
        "rankingEligible": true,
        "overallRank": 94,
        "url": "https://benchlm.ai/models/deepseek-r1",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-r1.md",
        "score": 31.2,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 74,
        "slug": "deepseek-v3-1",
        "canonicalModelKey": "deepseek-v3-1",
        "model": "DeepSeek V3.1",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 24,
        "rankingEligible": true,
        "overallRank": 106,
        "url": "https://benchlm.ai/models/deepseek-v3-1",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-v3-1.md",
        "score": 25.9,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 75,
        "slug": "claude-4-1-opus-thinking",
        "canonicalModelKey": "claude-4-1-opus-thinking",
        "model": "Claude 4.1 Opus Thinking",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 43,
        "rankingEligible": true,
        "overallRank": 80,
        "url": "https://benchlm.ai/models/claude-4-1-opus-thinking",
        "markdownUrl": "https://benchlm.ai/md/models/claude-4-1-opus-thinking.md",
        "score": 24.9,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 76,
        "slug": "gpt-4-turbo",
        "canonicalModelKey": "gpt-4-turbo",
        "model": "GPT-4 Turbo",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 24,
        "rankingEligible": true,
        "overallRank": 107,
        "url": "https://benchlm.ai/models/gpt-4-turbo",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-4-turbo.md",
        "score": 23.1,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 77,
        "slug": "gpt-4-1-nano",
        "canonicalModelKey": "gpt-4-1-nano",
        "model": "GPT-4.1 nano",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 27,
        "rankingEligible": true,
        "overallRank": 101,
        "url": "https://benchlm.ai/models/gpt-4-1-nano",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-4-1-nano.md",
        "score": 22.9,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 3,
        "scoreConfidence": 1
      },
      {
        "rank": 78,
        "slug": "glm-4-5",
        "canonicalModelKey": "glm-4-5",
        "model": "GLM-4.5",
        "creator": "Z.AI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 25,
        "rankingEligible": true,
        "overallRank": 102,
        "url": "https://benchlm.ai/models/glm-4-5",
        "markdownUrl": "https://benchlm.ai/md/models/glm-4-5.md",
        "score": 22.9,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 79,
        "slug": "llama-3-70b",
        "canonicalModelKey": "llama-3-70b",
        "model": "Llama 3 70B",
        "creator": "Meta",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 25,
        "rankingEligible": true,
        "overallRank": 105,
        "url": "https://benchlm.ai/models/llama-3-70b",
        "markdownUrl": "https://benchlm.ai/md/models/llama-3-70b.md",
        "score": 22,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 80,
        "slug": "gpt-oss-20b",
        "canonicalModelKey": "gpt-oss-20b",
        "model": "GPT-OSS 20B",
        "creator": "OpenAI",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 16,
        "rankingEligible": true,
        "overallRank": 119,
        "url": "https://benchlm.ai/models/gpt-oss-20b",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-oss-20b.md",
        "score": 21.3,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 81,
        "slug": "grok-3-beta",
        "canonicalModelKey": "grok-3-beta",
        "model": "Grok 3 [Beta]",
        "creator": "xAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 30,
        "rankingEligible": true,
        "overallRank": 97,
        "url": "https://benchlm.ai/models/grok-3-beta",
        "markdownUrl": "https://benchlm.ai/md/models/grok-3-beta.md",
        "score": 21,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 82,
        "slug": "nemotron-3-nano-30b",
        "canonicalModelKey": "nemotron-3-nano-30b",
        "model": "Nemotron 3 Nano 30B",
        "creator": "NVIDIA",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 25,
        "rankingEligible": true,
        "overallRank": 104,
        "url": "https://benchlm.ai/models/nemotron-3-nano-30b",
        "markdownUrl": "https://benchlm.ai/md/models/nemotron-3-nano-30b.md",
        "score": 19.8,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 83,
        "slug": "z-1",
        "canonicalModelKey": "z-1",
        "model": "Z-1",
        "creator": "Z",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 23,
        "rankingEligible": true,
        "overallRank": 109,
        "url": "https://benchlm.ai/models/z-1",
        "markdownUrl": "https://benchlm.ai/md/models/z-1.md",
        "score": 19.3,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 84,
        "slug": "moonshot-v1",
        "canonicalModelKey": "moonshot-v1",
        "model": "Moonshot v1",
        "creator": "Moonshot AI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 22,
        "rankingEligible": true,
        "overallRank": 114,
        "url": "https://benchlm.ai/models/moonshot-v1",
        "markdownUrl": "https://benchlm.ai/md/models/moonshot-v1.md",
        "score": 18.6,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 85,
        "slug": "nemotron-ultra-253b",
        "canonicalModelKey": "nemotron-ultra-253b",
        "model": "Nemotron Ultra 253B",
        "creator": "NVIDIA",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 22,
        "rankingEligible": true,
        "overallRank": 115,
        "url": "https://benchlm.ai/models/nemotron-ultra-253b",
        "markdownUrl": "https://benchlm.ai/md/models/nemotron-ultra-253b.md",
        "score": 18.6,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 86,
        "slug": "claude-3-haiku",
        "canonicalModelKey": "claude-3-haiku",
        "model": "Claude 3 Haiku",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 23,
        "rankingEligible": true,
        "overallRank": 111,
        "url": "https://benchlm.ai/models/claude-3-haiku",
        "markdownUrl": "https://benchlm.ai/md/models/claude-3-haiku.md",
        "score": 18.3,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 87,
        "slug": "glm-4-5-air",
        "canonicalModelKey": "glm-4-5-air",
        "model": "GLM-4.5-Air",
        "creator": "Z.AI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 18,
        "rankingEligible": true,
        "overallRank": 116,
        "url": "https://benchlm.ai/models/glm-4-5-air",
        "markdownUrl": "https://benchlm.ai/md/models/glm-4-5-air.md",
        "score": 17.8,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 88,
        "slug": "nemotron-4-15b",
        "canonicalModelKey": "nemotron-4-15b",
        "model": "Nemotron-4 15B",
        "creator": "NVIDIA",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 22,
        "rankingEligible": true,
        "overallRank": 113,
        "url": "https://benchlm.ai/models/nemotron-4-15b",
        "markdownUrl": "https://benchlm.ai/md/models/nemotron-4-15b.md",
        "score": 17.7,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 89,
        "slug": "llama-4-scout",
        "canonicalModelKey": "llama-4-scout",
        "model": "Llama 4 Scout",
        "creator": "Meta",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "10M",
        "contextWindowTokens": 10000000,
        "displayScore": 25,
        "rankingEligible": true,
        "overallRank": 103,
        "url": "https://benchlm.ai/models/llama-4-scout",
        "markdownUrl": "https://benchlm.ai/md/models/llama-4-scout.md",
        "score": 17.2,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 90,
        "slug": "mistral-8x7b",
        "canonicalModelKey": "mistral-8x7b",
        "model": "Mistral 8x7B",
        "creator": "Mistral",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 23,
        "rankingEligible": true,
        "overallRank": 110,
        "url": "https://benchlm.ai/models/mistral-8x7b",
        "markdownUrl": "https://benchlm.ai/md/models/mistral-8x7b.md",
        "score": 17.2,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 91,
        "slug": "qwen3-235b-2507",
        "canonicalModelKey": "qwen3-235b-2507",
        "model": "Qwen3 235B 2507",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 32,
        "rankingEligible": true,
        "overallRank": 95,
        "url": "https://benchlm.ai/models/qwen3-235b-2507",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-235b-2507.md",
        "score": 16.4,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 4,
        "scoreConfidence": 1
      },
      {
        "rank": 92,
        "slug": "o1-pro",
        "canonicalModelKey": "o1-pro",
        "model": "o1-pro",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 28,
        "rankingEligible": true,
        "overallRank": 99,
        "url": "https://benchlm.ai/models/o1-pro",
        "markdownUrl": "https://benchlm.ai/md/models/o1-pro.md",
        "score": 16.2,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 93,
        "slug": "gemini-1-0-pro",
        "canonicalModelKey": "gemini-1-0-pro",
        "model": "Gemini 1.0 Pro",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 24,
        "rankingEligible": true,
        "overallRank": 108,
        "url": "https://benchlm.ai/models/gemini-1-0-pro",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-1-0-pro.md",
        "score": 14.6,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 94,
        "slug": "gemma-3-27b",
        "canonicalModelKey": "gemma-3-27b",
        "model": "Gemma 3 27B",
        "creator": "Google",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 16,
        "rankingEligible": true,
        "overallRank": 118,
        "url": "https://benchlm.ai/models/gemma-3-27b",
        "markdownUrl": "https://benchlm.ai/md/models/gemma-3-27b.md",
        "score": 12.8,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 95,
        "slug": "llama-4-maverick",
        "canonicalModelKey": "llama-4-maverick",
        "model": "Llama 4 Maverick",
        "creator": "Meta",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 17,
        "rankingEligible": true,
        "overallRank": 117,
        "url": "https://benchlm.ai/models/llama-4-maverick",
        "markdownUrl": "https://benchlm.ai/md/models/llama-4-maverick.md",
        "score": 12.6,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 96,
        "slug": "phi-4",
        "canonicalModelKey": "phi-4",
        "model": "Phi-4",
        "creator": "Microsoft",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "16K",
        "contextWindowTokens": 16000,
        "displayScore": 27,
        "rankingEligible": true,
        "overallRank": 100,
        "url": "https://benchlm.ai/models/phi-4",
        "markdownUrl": "https://benchlm.ai/md/models/phi-4.md",
        "score": 11.7,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 97,
        "slug": "llama-4-behemoth",
        "canonicalModelKey": "llama-4-behemoth",
        "model": "Llama 4 Behemoth",
        "creator": "Meta",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 11,
        "rankingEligible": true,
        "overallRank": 120,
        "url": "https://benchlm.ai/models/llama-4-behemoth",
        "markdownUrl": "https://benchlm.ai/md/models/llama-4-behemoth.md",
        "score": 8.6,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 98,
        "slug": "dbrx-instruct",
        "canonicalModelKey": "dbrx-instruct",
        "model": "DBRX Instruct",
        "creator": "Databricks",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 32,
        "rankingEligible": true,
        "overallRank": 96,
        "url": "https://benchlm.ai/models/dbrx-instruct",
        "markdownUrl": "https://benchlm.ai/md/models/dbrx-instruct.md",
        "score": 8.5,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 99,
        "slug": "mixtral-8x22b-instruct-v0-1",
        "canonicalModelKey": "mixtral-8x22b-instruct-v0-1",
        "model": "Mixtral 8x22B Instruct v0.1",
        "creator": "Mistral",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "64K",
        "contextWindowTokens": 64000,
        "displayScore": 22,
        "rankingEligible": true,
        "overallRank": 112,
        "url": "https://benchlm.ai/models/mixtral-8x22b-instruct-v0-1",
        "markdownUrl": "https://benchlm.ai/md/models/mixtral-8x22b-instruct-v0-1.md",
        "score": 7.6,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 100,
        "slug": "nova-pro",
        "canonicalModelKey": "nova-pro",
        "model": "Nova Pro",
        "creator": "Amazon",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 10,
        "rankingEligible": true,
        "overallRank": 121,
        "url": "https://benchlm.ai/models/nova-pro",
        "markdownUrl": "https://benchlm.ai/md/models/nova-pro.md",
        "score": 4.8,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 101,
        "slug": "mistral-7b-v0-3",
        "canonicalModelKey": "mistral-7b-v0-3",
        "model": "Mistral 7B v0.3",
        "creator": "Mistral",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 4,
        "rankingEligible": true,
        "overallRank": 122,
        "url": "https://benchlm.ai/models/mistral-7b-v0-3",
        "markdownUrl": "https://benchlm.ai/md/models/mistral-7b-v0-3.md",
        "score": 0,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 102,
        "slug": "mistral-8x7b-v0-2",
        "canonicalModelKey": "mistral-8x7b-v0-2",
        "model": "Mistral 8x7B v0.2",
        "creator": "Mistral",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 1,
        "rankingEligible": true,
        "overallRank": 123,
        "url": "https://benchlm.ai/models/mistral-8x7b-v0-2",
        "markdownUrl": "https://benchlm.ai/md/models/mistral-8x7b-v0-2.md",
        "score": 0,
        "category": "agentic",
        "categoryLabel": "Agentic",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      }
    ],
    "coding": [
      {
        "rank": 1,
        "slug": "claude-mythos-5",
        "canonicalModelKey": "claude-mythos-5",
        "model": "Claude Mythos 5",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "1M+",
        "contextWindowTokens": 1000000,
        "displayScore": 99,
        "rankingEligible": true,
        "overallRank": 1,
        "url": "https://benchlm.ai/models/claude-mythos-5",
        "markdownUrl": "https://benchlm.ai/md/models/claude-mythos-5.md",
        "score": 100,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 17,
        "scoreConfidence": 3
      },
      {
        "rank": 2,
        "slug": "claude-fable",
        "canonicalModelKey": "claude-fable-5",
        "model": "Claude Fable 5",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "1M+",
        "contextWindowTokens": 1000000,
        "displayScore": 97,
        "rankingEligible": true,
        "overallRank": 2,
        "url": "https://benchlm.ai/models/claude-fable",
        "markdownUrl": "https://benchlm.ai/md/models/claude-fable.md",
        "score": 100,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 17,
        "scoreConfidence": 3
      },
      {
        "rank": 3,
        "slug": "claude-opus-4-8",
        "canonicalModelKey": "claude-opus-4-8",
        "model": "Claude Opus 4.8",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 93,
        "rankingEligible": true,
        "overallRank": 3,
        "url": "https://benchlm.ai/models/claude-opus-4-8",
        "markdownUrl": "https://benchlm.ai/md/models/claude-opus-4-8.md",
        "score": 98,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 23,
        "scoreConfidence": 2
      },
      {
        "rank": 4,
        "slug": "claude-opus-4-7-adaptive",
        "canonicalModelKey": "claude-opus-4-7-max",
        "model": "Claude Opus 4.7 (Adaptive)",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 84,
        "rankingEligible": true,
        "overallRank": 16,
        "url": "https://benchlm.ai/models/claude-opus-4-7-adaptive",
        "markdownUrl": "https://benchlm.ai/md/models/claude-opus-4-7-adaptive.md",
        "score": 93.5,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 18,
        "scoreConfidence": 3
      },
      {
        "rank": 5,
        "slug": "gemini-3-1-pro",
        "canonicalModelKey": "gemini-3-1-pro",
        "model": "Gemini 3.1 Pro",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 91,
        "rankingEligible": true,
        "overallRank": 4,
        "url": "https://benchlm.ai/models/gemini-3-1-pro",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-3-1-pro.md",
        "score": 93,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 17,
        "scoreConfidence": 3
      },
      {
        "rank": 6,
        "slug": "qwen3-7-max",
        "canonicalModelKey": "qwen3-7-max",
        "model": "Qwen3.7 Max",
        "creator": "Alibaba",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 91,
        "rankingEligible": true,
        "overallRank": 5,
        "url": "https://benchlm.ai/models/qwen3-7-max",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-7-max.md",
        "score": 91.6,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 33,
        "scoreConfidence": 3
      },
      {
        "rank": 7,
        "slug": "kimi-2-6",
        "canonicalModelKey": "kimi-2-6",
        "model": "Kimi K2.6",
        "creator": "Moonshot AI",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "256K",
        "contextWindowTokens": 256000,
        "displayScore": 81,
        "rankingEligible": true,
        "overallRank": 21,
        "url": "https://benchlm.ai/models/kimi-2-6",
        "markdownUrl": "https://benchlm.ai/md/models/kimi-2-6.md",
        "score": 89.8,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 28,
        "scoreConfidence": 2
      },
      {
        "rank": 8,
        "slug": "deepseek-v4-pro-max",
        "canonicalModelKey": "deepseek-v4-pro-max",
        "model": "DeepSeek V4 Pro (Max)",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 86,
        "rankingEligible": true,
        "overallRank": 14,
        "url": "https://benchlm.ai/models/deepseek-v4-pro-max",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-v4-pro-max.md",
        "score": 89.7,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 24,
        "scoreConfidence": 2
      },
      {
        "rank": 9,
        "slug": "qwen3-7-plus",
        "canonicalModelKey": "qwen3-7-plus",
        "model": "Qwen3.7 Plus",
        "creator": "Alibaba",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 88,
        "rankingEligible": true,
        "overallRank": 11,
        "url": "https://benchlm.ai/models/qwen3-7-plus",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-7-plus.md",
        "score": 87.7,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 50,
        "scoreConfidence": 4
      },
      {
        "rank": 10,
        "slug": "gpt-5-4",
        "canonicalModelKey": "gpt-5-4",
        "model": "GPT-5.4",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "1.05M",
        "contextWindowTokens": 1050000,
        "displayScore": 88,
        "rankingEligible": true,
        "overallRank": 10,
        "url": "https://benchlm.ai/models/gpt-5-4",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-4.md",
        "score": 87.2,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 26,
        "scoreConfidence": 4
      },
      {
        "rank": 11,
        "slug": "gpt-5-3-codex",
        "canonicalModelKey": "gpt-5-3-codex",
        "model": "GPT-5.3 Codex",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "400K",
        "contextWindowTokens": 400000,
        "displayScore": 85,
        "rankingEligible": true,
        "overallRank": 15,
        "url": "https://benchlm.ai/models/gpt-5-3-codex",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-3-codex.md",
        "score": 87.1,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 6,
        "scoreConfidence": 1
      },
      {
        "rank": 12,
        "slug": "deepseek-v4-pro-high",
        "canonicalModelKey": "deepseek-v4-pro-high",
        "model": "DeepSeek V4 Pro (High)",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 82,
        "rankingEligible": true,
        "overallRank": 19,
        "url": "https://benchlm.ai/models/deepseek-v4-pro-high",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-v4-pro-high.md",
        "score": 86,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 23,
        "scoreConfidence": 2
      },
      {
        "rank": 13,
        "slug": "claude-opus-4-6",
        "canonicalModelKey": "claude-opus-4-6",
        "model": "Claude Opus 4.6",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 86,
        "rankingEligible": true,
        "overallRank": 12,
        "url": "https://benchlm.ai/models/claude-opus-4-6",
        "markdownUrl": "https://benchlm.ai/md/models/claude-opus-4-6.md",
        "score": 85.5,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 27,
        "scoreConfidence": 4
      },
      {
        "rank": 14,
        "slug": "kimi-k2-5-reasoning",
        "canonicalModelKey": "kimi-k2-5-reasoning",
        "model": "Kimi K2.5 (Reasoning)",
        "creator": "Moonshot AI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 75,
        "rankingEligible": true,
        "overallRank": 31,
        "url": "https://benchlm.ai/models/kimi-k2-5-reasoning",
        "markdownUrl": "https://benchlm.ai/md/models/kimi-k2-5-reasoning.md",
        "score": 85,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 8,
        "scoreConfidence": 2
      },
      {
        "rank": 15,
        "slug": "mai-thinking-1",
        "canonicalModelKey": "mai-thinking-1",
        "model": "MAI-Thinking-1",
        "creator": "Microsoft",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "256K",
        "contextWindowTokens": 256000,
        "displayScore": 65,
        "rankingEligible": true,
        "overallRank": 44,
        "url": "https://benchlm.ai/models/mai-thinking-1",
        "markdownUrl": "https://benchlm.ai/md/models/mai-thinking-1.md",
        "score": 84.5,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 14,
        "scoreConfidence": 3
      },
      {
        "rank": 16,
        "slug": "glm-5-1",
        "canonicalModelKey": "glm-5-1",
        "model": "GLM-5.1",
        "creator": "Z.AI",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "203K",
        "contextWindowTokens": 203000,
        "displayScore": 82,
        "rankingEligible": true,
        "overallRank": 17,
        "url": "https://benchlm.ai/models/glm-5-1",
        "markdownUrl": "https://benchlm.ai/md/models/glm-5-1.md",
        "score": 82.8,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 16,
        "scoreConfidence": 3
      },
      {
        "rank": 17,
        "slug": "deepseek-v4-flash-high",
        "canonicalModelKey": "deepseek-v4-flash-high",
        "model": "DeepSeek V4 Flash (High)",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 71,
        "rankingEligible": true,
        "overallRank": 36,
        "url": "https://benchlm.ai/models/deepseek-v4-flash-high",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-v4-flash-high.md",
        "score": 82.5,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 23,
        "scoreConfidence": 2
      },
      {
        "rank": 18,
        "slug": "deepseek-v4-flash-max",
        "canonicalModelKey": "deepseek-v4-flash-max",
        "model": "DeepSeek V4 Flash (Max)",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 74,
        "rankingEligible": true,
        "overallRank": 33,
        "url": "https://benchlm.ai/models/deepseek-v4-flash-max",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-v4-flash-max.md",
        "score": 82.2,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 23,
        "scoreConfidence": 2
      },
      {
        "rank": 19,
        "slug": "claude-sonnet-4-6",
        "canonicalModelKey": "claude-sonnet-4-6",
        "model": "Claude Sonnet 4.6",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 82,
        "rankingEligible": true,
        "overallRank": 18,
        "url": "https://benchlm.ai/models/claude-sonnet-4-6",
        "markdownUrl": "https://benchlm.ai/md/models/claude-sonnet-4-6.md",
        "score": 82,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 14,
        "scoreConfidence": 3
      },
      {
        "rank": 20,
        "slug": "grok-4-1",
        "canonicalModelKey": "grok-4-1",
        "model": "Grok 4.1",
        "creator": "xAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 89,
        "rankingEligible": true,
        "overallRank": 9,
        "url": "https://benchlm.ai/models/grok-4-1",
        "markdownUrl": "https://benchlm.ai/md/models/grok-4-1.md",
        "score": 81.7,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 21,
        "slug": "kimi-k2-5",
        "canonicalModelKey": "kimi-k2-5",
        "model": "Kimi K2.5",
        "creator": "Moonshot AI",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "256K",
        "contextWindowTokens": 256000,
        "displayScore": 63,
        "rankingEligible": true,
        "overallRank": 47,
        "url": "https://benchlm.ai/models/kimi-k2-5",
        "markdownUrl": "https://benchlm.ai/md/models/kimi-k2-5.md",
        "score": 81,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 39,
        "scoreConfidence": 4
      },
      {
        "rank": 22,
        "slug": "nemotron-3-ultra",
        "canonicalModelKey": "nemotron-3-ultra-500b",
        "model": "Nemotron 3 Ultra",
        "creator": "NVIDIA",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 68,
        "rankingEligible": true,
        "overallRank": 38,
        "url": "https://benchlm.ai/models/nemotron-3-ultra",
        "markdownUrl": "https://benchlm.ai/md/models/nemotron-3-ultra.md",
        "score": 80.4,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 18,
        "scoreConfidence": 3
      },
      {
        "rank": 23,
        "slug": "o1-preview",
        "canonicalModelKey": "o1-preview",
        "model": "o1-preview",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 82,
        "rankingEligible": true,
        "overallRank": 20,
        "url": "https://benchlm.ai/models/o1-preview",
        "markdownUrl": "https://benchlm.ai/md/models/o1-preview.md",
        "score": 78.8,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 24,
        "slug": "gpt-5-2",
        "canonicalModelKey": "gpt-5-2",
        "model": "GPT-5.2",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "400K",
        "contextWindowTokens": 400000,
        "displayScore": 78,
        "rankingEligible": true,
        "overallRank": 25,
        "url": "https://benchlm.ai/models/gpt-5-2",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-2.md",
        "score": 78.7,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 11,
        "scoreConfidence": 2
      },
      {
        "rank": 25,
        "slug": "claude-sonnet-4-5",
        "canonicalModelKey": "claude-sonnet-4-5",
        "model": "Claude Sonnet 4.5",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 64,
        "rankingEligible": true,
        "overallRank": 46,
        "url": "https://benchlm.ai/models/claude-sonnet-4-5",
        "markdownUrl": "https://benchlm.ai/md/models/claude-sonnet-4-5.md",
        "score": 78.4,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 6,
        "scoreConfidence": 1
      },
      {
        "rank": 26,
        "slug": "qwen3-6-27b",
        "canonicalModelKey": "qwen3-6-27b",
        "model": "Qwen3.6-27B",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "262K",
        "contextWindowTokens": 262000,
        "displayScore": 72,
        "rankingEligible": true,
        "overallRank": 34,
        "url": "https://benchlm.ai/models/qwen3-6-27b",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-6-27b.md",
        "score": 77.9,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 37,
        "scoreConfidence": 2
      },
      {
        "rank": 27,
        "slug": "gpt-5-medium",
        "canonicalModelKey": "gpt-5-medium",
        "model": "GPT-5 (medium)",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 70,
        "rankingEligible": true,
        "overallRank": 37,
        "url": "https://benchlm.ai/models/gpt-5-medium",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-medium.md",
        "score": 77.7,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 28,
        "slug": "qwen3-6-plus",
        "canonicalModelKey": "qwen3-6-plus",
        "model": "Qwen3.6 Plus",
        "creator": "Alibaba",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 66,
        "rankingEligible": true,
        "overallRank": 43,
        "url": "https://benchlm.ai/models/qwen3-6-plus",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-6-plus.md",
        "score": 77.7,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 38,
        "scoreConfidence": 4
      },
      {
        "rank": 29,
        "slug": "gpt-5-2-codex",
        "canonicalModelKey": "gpt-5-2-codex",
        "model": "GPT-5.2-Codex",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "400K",
        "contextWindowTokens": 400000,
        "displayScore": 76,
        "rankingEligible": true,
        "overallRank": 30,
        "url": "https://benchlm.ai/models/gpt-5-2-codex",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-2-codex.md",
        "score": 77.5,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 30,
        "slug": "gemini-3-5-flash",
        "canonicalModelKey": "gemini-3-5-flash",
        "model": "Gemini 3.5 Flash",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 86,
        "rankingEligible": true,
        "overallRank": 13,
        "url": "https://benchlm.ai/models/gemini-3-5-flash",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-3-5-flash.md",
        "score": 77.3,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 20,
        "scoreConfidence": 3
      },
      {
        "rank": 31,
        "slug": "glm-5",
        "canonicalModelKey": "glm-5",
        "model": "GLM-5",
        "creator": "Z.AI",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 67,
        "rankingEligible": true,
        "overallRank": 42,
        "url": "https://benchlm.ai/models/glm-5",
        "markdownUrl": "https://benchlm.ai/md/models/glm-5.md",
        "score": 76.2,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 33,
        "scoreConfidence": 4
      },
      {
        "rank": 32,
        "slug": "gpt-5-1",
        "canonicalModelKey": "gpt-5-1",
        "model": "GPT-5.1",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 77,
        "rankingEligible": true,
        "overallRank": 27,
        "url": "https://benchlm.ai/models/gpt-5-1",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-1.md",
        "score": 76,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 33,
        "slug": "grok-4",
        "canonicalModelKey": "grok-4",
        "model": "Grok 4",
        "creator": "xAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 63,
        "rankingEligible": true,
        "overallRank": 50,
        "url": "https://benchlm.ai/models/grok-4",
        "markdownUrl": "https://benchlm.ai/md/models/grok-4.md",
        "score": 76,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 34,
        "slug": "claude-opus-4-5",
        "canonicalModelKey": "claude-opus-4-5",
        "model": "Claude Opus 4.5",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 76,
        "rankingEligible": true,
        "overallRank": 28,
        "url": "https://benchlm.ai/models/claude-opus-4-5",
        "markdownUrl": "https://benchlm.ai/md/models/claude-opus-4-5.md",
        "score": 75.6,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 41,
        "scoreConfidence": 4
      },
      {
        "rank": 35,
        "slug": "gemini-3-pro-deep-think",
        "canonicalModelKey": "gemini-3-pro-deep-think",
        "model": "Gemini 3 Pro Deep Think",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "2M",
        "contextWindowTokens": 2000000,
        "displayScore": 89,
        "rankingEligible": true,
        "overallRank": 8,
        "url": "https://benchlm.ai/models/gemini-3-pro-deep-think",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-3-pro-deep-think.md",
        "score": 73.4,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 36,
        "slug": "glm-4-7",
        "canonicalModelKey": "glm-4-7",
        "model": "GLM-4.7",
        "creator": "Z.AI",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 68,
        "rankingEligible": true,
        "overallRank": 40,
        "url": "https://benchlm.ai/models/glm-4-7",
        "markdownUrl": "https://benchlm.ai/md/models/glm-4-7.md",
        "score": 72.5,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 7,
        "scoreConfidence": 1
      },
      {
        "rank": 37,
        "slug": "gemini-3-pro",
        "canonicalModelKey": "gemini-3-pro",
        "model": "Gemini 3 Pro",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "2M",
        "contextWindowTokens": 2000000,
        "displayScore": 80,
        "rankingEligible": true,
        "overallRank": 22,
        "url": "https://benchlm.ai/models/gemini-3-pro",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-3-pro.md",
        "score": 72.4,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 8,
        "scoreConfidence": 2
      },
      {
        "rank": 38,
        "slug": "qwen3-6-35b-a3b",
        "canonicalModelKey": "qwen3-6-35b-a3b",
        "model": "Qwen3.6-35B-A3B",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "262K",
        "contextWindowTokens": 262000,
        "displayScore": 65,
        "rankingEligible": true,
        "overallRank": 45,
        "url": "https://benchlm.ai/models/qwen3-6-35b-a3b",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-6-35b-a3b.md",
        "score": 72.2,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 40,
        "scoreConfidence": 2
      },
      {
        "rank": 39,
        "slug": "qwen3-5-27b",
        "canonicalModelKey": "qwen3-5-27b",
        "model": "Qwen3.5-27B",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "262K",
        "contextWindowTokens": 262000,
        "displayScore": 61,
        "rankingEligible": true,
        "overallRank": 52,
        "url": "https://benchlm.ai/models/qwen3-5-27b",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-5-27b.md",
        "score": 70.3,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 13,
        "scoreConfidence": 3
      },
      {
        "rank": 40,
        "slug": "gpt-5-high",
        "canonicalModelKey": "gpt-5-high",
        "model": "GPT-5 (high)",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 76,
        "rankingEligible": true,
        "overallRank": 29,
        "url": "https://benchlm.ai/models/gpt-5-high",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-high.md",
        "score": 69.7,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 41,
        "slug": "glm-5-reasoning",
        "canonicalModelKey": "glm-5-reasoning",
        "model": "GLM-5 (Reasoning)",
        "creator": "Z.AI",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 79,
        "rankingEligible": true,
        "overallRank": 24,
        "url": "https://benchlm.ai/models/glm-5-reasoning",
        "markdownUrl": "https://benchlm.ai/md/models/glm-5-reasoning.md",
        "score": 69.1,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 42,
        "slug": "mimo-v2-flash",
        "canonicalModelKey": "mimo-v2-flash",
        "model": "MiMo-V2-Flash",
        "creator": "Xiaomi",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "256K",
        "contextWindowTokens": 256000,
        "displayScore": 59,
        "rankingEligible": true,
        "overallRank": 54,
        "url": "https://benchlm.ai/models/mimo-v2-flash",
        "markdownUrl": "https://benchlm.ai/md/models/mimo-v2-flash.md",
        "score": 68.6,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 43,
        "slug": "deepseek-v4-pro",
        "canonicalModelKey": "deepseek-v4-pro",
        "model": "DeepSeek V4 Pro",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 68,
        "rankingEligible": true,
        "overallRank": 39,
        "url": "https://benchlm.ai/models/deepseek-v4-pro",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-v4-pro.md",
        "score": 68.4,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 21,
        "scoreConfidence": 2
      },
      {
        "rank": 44,
        "slug": "qwen3-5-397b",
        "canonicalModelKey": "qwen3-5-397b",
        "model": "Qwen3.5 397B",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 62,
        "rankingEligible": true,
        "overallRank": 51,
        "url": "https://benchlm.ai/models/qwen3-5-397b",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-5-397b.md",
        "score": 67.5,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 36,
        "scoreConfidence": 4
      },
      {
        "rank": 45,
        "slug": "o3",
        "canonicalModelKey": "o3",
        "model": "o3",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 56,
        "rankingEligible": true,
        "overallRank": 61,
        "url": "https://benchlm.ai/models/o3",
        "markdownUrl": "https://benchlm.ai/md/models/o3.md",
        "score": 65.1,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 46,
        "slug": "deepseek-v4-flash",
        "canonicalModelKey": "deepseek-v4-flash",
        "model": "DeepSeek V4 Flash",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 57,
        "rankingEligible": true,
        "overallRank": 55,
        "url": "https://benchlm.ai/models/deepseek-v4-flash",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-v4-flash.md",
        "score": 62,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 21,
        "scoreConfidence": 2
      },
      {
        "rank": 47,
        "slug": "grok-code-fast-1",
        "canonicalModelKey": "grok-code-fast-1",
        "model": "Grok Code Fast 1",
        "creator": "xAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "256K",
        "contextWindowTokens": 256000,
        "displayScore": 39,
        "rankingEligible": true,
        "overallRank": 85,
        "url": "https://benchlm.ai/models/grok-code-fast-1",
        "markdownUrl": "https://benchlm.ai/md/models/grok-code-fast-1.md",
        "score": 61.6,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 48,
        "slug": "deepseek-coder-2-0",
        "canonicalModelKey": "deepseek-coder-2-0",
        "model": "DeepSeek Coder 2.0",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 51,
        "rankingEligible": true,
        "overallRank": 66,
        "url": "https://benchlm.ai/models/deepseek-coder-2-0",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-coder-2-0.md",
        "score": 60.9,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 49,
        "slug": "grok-4-1-fast",
        "canonicalModelKey": "grok-4-1-fast",
        "model": "Grok 4.1 Fast",
        "creator": "xAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 68,
        "rankingEligible": true,
        "overallRank": 41,
        "url": "https://benchlm.ai/models/grok-4-1-fast",
        "markdownUrl": "https://benchlm.ai/md/models/grok-4-1-fast.md",
        "score": 59.8,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 50,
        "slug": "qwen3-5-35b-a3b",
        "canonicalModelKey": "qwen3-5-35b-a3b",
        "model": "Qwen3.5-35B-A3B",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "262K",
        "contextWindowTokens": 262000,
        "displayScore": 55,
        "rankingEligible": true,
        "overallRank": 62,
        "url": "https://benchlm.ai/models/qwen3-5-35b-a3b",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-5-35b-a3b.md",
        "score": 59.1,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 13,
        "scoreConfidence": 3
      },
      {
        "rank": 51,
        "slug": "deepseek-v3-2",
        "canonicalModelKey": "deepseek-v3-2",
        "model": "DeepSeek V3.2",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 56,
        "rankingEligible": true,
        "overallRank": 59,
        "url": "https://benchlm.ai/models/deepseek-v3-2",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-v3-2.md",
        "score": 57.3,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 4,
        "scoreConfidence": 1
      },
      {
        "rank": 52,
        "slug": "claude-4-1-opus",
        "canonicalModelKey": "claude-4-1-opus",
        "model": "Claude 4.1 Opus",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 51,
        "rankingEligible": true,
        "overallRank": 67,
        "url": "https://benchlm.ai/models/claude-4-1-opus",
        "markdownUrl": "https://benchlm.ai/md/models/claude-4-1-opus.md",
        "score": 56.4,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 53,
        "slug": "o3-pro",
        "canonicalModelKey": "o3-pro",
        "model": "o3-pro",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 57,
        "rankingEligible": true,
        "overallRank": 57,
        "url": "https://benchlm.ai/models/o3-pro",
        "markdownUrl": "https://benchlm.ai/md/models/o3-pro.md",
        "score": 54.5,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 54,
        "slug": "minimax-m2-7",
        "canonicalModelKey": "minimax-m2-7",
        "model": "MiniMax M2.7",
        "creator": "MiniMax",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 53,
        "rankingEligible": true,
        "overallRank": 65,
        "url": "https://benchlm.ai/models/minimax-m2-7",
        "markdownUrl": "https://benchlm.ai/md/models/minimax-m2-7.md",
        "score": 54,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 17,
        "scoreConfidence": 2
      },
      {
        "rank": 55,
        "slug": "claude-4-sonnet",
        "canonicalModelKey": "claude-4-sonnet",
        "model": "Claude 4 Sonnet",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 50,
        "rankingEligible": true,
        "overallRank": 70,
        "url": "https://benchlm.ai/models/claude-4-sonnet",
        "markdownUrl": "https://benchlm.ai/md/models/claude-4-sonnet.md",
        "score": 53.4,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 56,
        "slug": "claude-haiku-4-5",
        "canonicalModelKey": "claude-haiku-4-5",
        "model": "Claude Haiku 4.5",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 56,
        "rankingEligible": true,
        "overallRank": 60,
        "url": "https://benchlm.ai/models/claude-haiku-4-5",
        "markdownUrl": "https://benchlm.ai/md/models/claude-haiku-4-5.md",
        "score": 53.2,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 57,
        "slug": "deepseek-v3-2-thinking",
        "canonicalModelKey": "deepseek-v3-2-thinking",
        "model": "DeepSeek V3.2 (Thinking)",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 60,
        "rankingEligible": true,
        "overallRank": 53,
        "url": "https://benchlm.ai/models/deepseek-v3-2-thinking",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-v3-2-thinking.md",
        "score": 51.5,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 58,
        "slug": "deepseekmath-v2",
        "canonicalModelKey": "deepseekmath-v2",
        "model": "DeepSeekMath V2",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 49,
        "rankingEligible": true,
        "overallRank": 73,
        "url": "https://benchlm.ai/models/deepseekmath-v2",
        "markdownUrl": "https://benchlm.ai/md/models/deepseekmath-v2.md",
        "score": 51.3,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 59,
        "slug": "claude-4-1-opus-thinking",
        "canonicalModelKey": "claude-4-1-opus-thinking",
        "model": "Claude 4.1 Opus Thinking",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 43,
        "rankingEligible": true,
        "overallRank": 80,
        "url": "https://benchlm.ai/models/claude-4-1-opus-thinking",
        "markdownUrl": "https://benchlm.ai/md/models/claude-4-1-opus-thinking.md",
        "score": 51.3,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 60,
        "slug": "qwen2-5-1m",
        "canonicalModelKey": "qwen2-5-1m",
        "model": "Qwen2.5-1M",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 50,
        "rankingEligible": true,
        "overallRank": 69,
        "url": "https://benchlm.ai/models/qwen2-5-1m",
        "markdownUrl": "https://benchlm.ai/md/models/qwen2-5-1m.md",
        "score": 48.6,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 61,
        "slug": "gemini-2-5-pro",
        "canonicalModelKey": "gemini-2-5-pro",
        "model": "Gemini 2.5 Pro",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 63,
        "rankingEligible": true,
        "overallRank": 49,
        "url": "https://benchlm.ai/models/gemini-2-5-pro",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-2-5-pro.md",
        "score": 48.5,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 3,
        "scoreConfidence": 1
      },
      {
        "rank": 62,
        "slug": "gemini-3-flash",
        "canonicalModelKey": "gemini-3-flash",
        "model": "Gemini 3 Flash",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 55,
        "rankingEligible": true,
        "overallRank": 63,
        "url": "https://benchlm.ai/models/gemini-3-flash",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-3-flash.md",
        "score": 47.5,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 2,
        "scoreConfidence": 1
      },
      {
        "rank": 63,
        "slug": "qwen2-5-72b",
        "canonicalModelKey": "qwen2-5-72b",
        "model": "Qwen2.5-72B",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 49,
        "rankingEligible": true,
        "overallRank": 72,
        "url": "https://benchlm.ai/models/qwen2-5-72b",
        "markdownUrl": "https://benchlm.ai/md/models/qwen2-5-72b.md",
        "score": 46.8,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 64,
        "slug": "deepseek-llm-2-0",
        "canonicalModelKey": "deepseek-llm-2-0",
        "model": "DeepSeek LLM 2.0",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 50,
        "rankingEligible": true,
        "overallRank": 68,
        "url": "https://benchlm.ai/models/deepseek-llm-2-0",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-llm-2-0.md",
        "score": 45.6,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 65,
        "slug": "o4-mini-high",
        "canonicalModelKey": "o4-mini-high",
        "model": "o4-mini (high)",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 43,
        "rankingEligible": true,
        "overallRank": 79,
        "url": "https://benchlm.ai/models/o4-mini-high",
        "markdownUrl": "https://benchlm.ai/md/models/o4-mini-high.md",
        "score": 45.5,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 66,
        "slug": "nemotron-3-super-100b",
        "canonicalModelKey": "nemotron-3-super-100b",
        "model": "Nemotron 3 Super 100B",
        "creator": "NVIDIA",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 43,
        "rankingEligible": true,
        "overallRank": 78,
        "url": "https://benchlm.ai/models/nemotron-3-super-100b",
        "markdownUrl": "https://benchlm.ai/md/models/nemotron-3-super-100b.md",
        "score": 43,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 67,
        "slug": "qwen3-235b-2507-reasoning",
        "canonicalModelKey": "qwen3-235b-2507-reasoning",
        "model": "Qwen3 235B 2507 (Reasoning)",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 45,
        "rankingEligible": true,
        "overallRank": 76,
        "url": "https://benchlm.ai/models/qwen3-235b-2507-reasoning",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-235b-2507-reasoning.md",
        "score": 41.5,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 68,
        "slug": "claude-3-5-sonnet",
        "canonicalModelKey": "claude-3-5-sonnet",
        "model": "Claude 3.5 Sonnet",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 40,
        "rankingEligible": true,
        "overallRank": 84,
        "url": "https://benchlm.ai/models/claude-3-5-sonnet",
        "markdownUrl": "https://benchlm.ai/md/models/claude-3-5-sonnet.md",
        "score": 40.3,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 69,
        "slug": "mistral-large-3",
        "canonicalModelKey": "mistral-large-3",
        "model": "Mistral Large 3",
        "creator": "Mistral",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 48,
        "rankingEligible": true,
        "overallRank": 74,
        "url": "https://benchlm.ai/models/mistral-large-3",
        "markdownUrl": "https://benchlm.ai/md/models/mistral-large-3.md",
        "score": 37.5,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 70,
        "slug": "llama-3-1-405b",
        "canonicalModelKey": "llama-3-1-405b",
        "model": "Llama 3.1 405B",
        "creator": "Meta",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 40,
        "rankingEligible": true,
        "overallRank": 83,
        "url": "https://benchlm.ai/models/llama-3-1-405b",
        "markdownUrl": "https://benchlm.ai/md/models/llama-3-1-405b.md",
        "score": 36.5,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 71,
        "slug": "mistral-large-2",
        "canonicalModelKey": "mistral-large-2",
        "model": "Mistral Large 2",
        "creator": "Mistral",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 38,
        "rankingEligible": true,
        "overallRank": 87,
        "url": "https://benchlm.ai/models/mistral-large-2",
        "markdownUrl": "https://benchlm.ai/md/models/mistral-large-2.md",
        "score": 32.6,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 72,
        "slug": "grok-3-beta",
        "canonicalModelKey": "grok-3-beta",
        "model": "Grok 3 [Beta]",
        "creator": "xAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 30,
        "rankingEligible": true,
        "overallRank": 97,
        "url": "https://benchlm.ai/models/grok-3-beta",
        "markdownUrl": "https://benchlm.ai/md/models/grok-3-beta.md",
        "score": 28.2,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 73,
        "slug": "nemotron-ultra-253b",
        "canonicalModelKey": "nemotron-ultra-253b",
        "model": "Nemotron Ultra 253B",
        "creator": "NVIDIA",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 22,
        "rankingEligible": true,
        "overallRank": 115,
        "url": "https://benchlm.ai/models/nemotron-ultra-253b",
        "markdownUrl": "https://benchlm.ai/md/models/nemotron-ultra-253b.md",
        "score": 26.9,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 74,
        "slug": "deepseek-r1",
        "canonicalModelKey": "deepseek-r1",
        "model": "DeepSeek-R1",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 32,
        "rankingEligible": true,
        "overallRank": 94,
        "url": "https://benchlm.ai/models/deepseek-r1",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-r1.md",
        "score": 26.4,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 75,
        "slug": "gemini-3-1-flash-lite",
        "canonicalModelKey": "gemini-3-1-flash-lite",
        "model": "Gemini 3.1 Flash-Lite",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 47,
        "rankingEligible": true,
        "overallRank": 75,
        "url": "https://benchlm.ai/models/gemini-3-1-flash-lite",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-3-1-flash-lite.md",
        "score": 25.8,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 2,
        "scoreConfidence": 1
      },
      {
        "rank": 76,
        "slug": "gpt-4o",
        "canonicalModelKey": "gpt-4o",
        "model": "GPT-4o",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 42,
        "rankingEligible": true,
        "overallRank": 81,
        "url": "https://benchlm.ai/models/gpt-4o",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-4o.md",
        "score": 24.4,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 77,
        "slug": "qwen3-235b-2507",
        "canonicalModelKey": "qwen3-235b-2507",
        "model": "Qwen3 235B 2507",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 32,
        "rankingEligible": true,
        "overallRank": 95,
        "url": "https://benchlm.ai/models/qwen3-235b-2507",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-235b-2507.md",
        "score": 22.8,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 4,
        "scoreConfidence": 1
      },
      {
        "rank": 78,
        "slug": "gpt-oss-120b",
        "canonicalModelKey": "gpt-oss-120b",
        "model": "GPT-OSS 120B",
        "creator": "OpenAI",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 34,
        "rankingEligible": true,
        "overallRank": 91,
        "url": "https://benchlm.ai/models/gpt-oss-120b",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-oss-120b.md",
        "score": 21.4,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 79,
        "slug": "z-1",
        "canonicalModelKey": "z-1",
        "model": "Z-1",
        "creator": "Z",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 23,
        "rankingEligible": true,
        "overallRank": 109,
        "url": "https://benchlm.ai/models/z-1",
        "markdownUrl": "https://benchlm.ai/md/models/z-1.md",
        "score": 21.3,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 80,
        "slug": "moonshot-v1",
        "canonicalModelKey": "moonshot-v1",
        "model": "Moonshot v1",
        "creator": "Moonshot AI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 22,
        "rankingEligible": true,
        "overallRank": 114,
        "url": "https://benchlm.ai/models/moonshot-v1",
        "markdownUrl": "https://benchlm.ai/md/models/moonshot-v1.md",
        "score": 21.2,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 81,
        "slug": "deepseek-v3-1-reasoning",
        "canonicalModelKey": "deepseek-v3-1-reasoning",
        "model": "DeepSeek V3.1 (Reasoning)",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 29,
        "rankingEligible": true,
        "overallRank": 98,
        "url": "https://benchlm.ai/models/deepseek-v3-1-reasoning",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-v3-1-reasoning.md",
        "score": 21,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 82,
        "slug": "nemotron-4-15b",
        "canonicalModelKey": "nemotron-4-15b",
        "model": "Nemotron-4 15B",
        "creator": "NVIDIA",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 22,
        "rankingEligible": true,
        "overallRank": 113,
        "url": "https://benchlm.ai/models/nemotron-4-15b",
        "markdownUrl": "https://benchlm.ai/md/models/nemotron-4-15b.md",
        "score": 20.7,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 83,
        "slug": "mistral-8x7b",
        "canonicalModelKey": "mistral-8x7b",
        "model": "Mistral 8x7B",
        "creator": "Mistral",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 23,
        "rankingEligible": true,
        "overallRank": 110,
        "url": "https://benchlm.ai/models/mistral-8x7b",
        "markdownUrl": "https://benchlm.ai/md/models/mistral-8x7b.md",
        "score": 18.8,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 84,
        "slug": "nemotron-3-nano-30b",
        "canonicalModelKey": "nemotron-3-nano-30b",
        "model": "Nemotron 3 Nano 30B",
        "creator": "NVIDIA",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 25,
        "rankingEligible": true,
        "overallRank": 104,
        "url": "https://benchlm.ai/models/nemotron-3-nano-30b",
        "markdownUrl": "https://benchlm.ai/md/models/nemotron-3-nano-30b.md",
        "score": 16.8,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 85,
        "slug": "gemini-2-5-flash",
        "canonicalModelKey": "gemini-2-5-flash",
        "model": "Gemini 2.5 Flash",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 37,
        "rankingEligible": true,
        "overallRank": 88,
        "url": "https://benchlm.ai/models/gemini-2-5-flash",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-2-5-flash.md",
        "score": 16.5,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 86,
        "slug": "deepseek-v3-1",
        "canonicalModelKey": "deepseek-v3-1",
        "model": "DeepSeek V3.1",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 24,
        "rankingEligible": true,
        "overallRank": 106,
        "url": "https://benchlm.ai/models/deepseek-v3-1",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-v3-1.md",
        "score": 15.9,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 87,
        "slug": "gemini-1-5-pro",
        "canonicalModelKey": "gemini-1-5-pro",
        "model": "Gemini 1.5 Pro",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "2M",
        "contextWindowTokens": 2000000,
        "displayScore": 35,
        "rankingEligible": true,
        "overallRank": 89,
        "url": "https://benchlm.ai/models/gemini-1-5-pro",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-1-5-pro.md",
        "score": 14.9,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 88,
        "slug": "claude-3-opus",
        "canonicalModelKey": "claude-3-opus",
        "model": "Claude 3 Opus",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 34,
        "rankingEligible": true,
        "overallRank": 92,
        "url": "https://benchlm.ai/models/claude-3-opus",
        "markdownUrl": "https://benchlm.ai/md/models/claude-3-opus.md",
        "score": 13.4,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 89,
        "slug": "gpt-4-turbo",
        "canonicalModelKey": "gpt-4-turbo",
        "model": "GPT-4 Turbo",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 24,
        "rankingEligible": true,
        "overallRank": 107,
        "url": "https://benchlm.ai/models/gpt-4-turbo",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-4-turbo.md",
        "score": 12.6,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 90,
        "slug": "glm-4-5-air",
        "canonicalModelKey": "glm-4-5-air",
        "model": "GLM-4.5-Air",
        "creator": "Z.AI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 18,
        "rankingEligible": true,
        "overallRank": 116,
        "url": "https://benchlm.ai/models/glm-4-5-air",
        "markdownUrl": "https://benchlm.ai/md/models/glm-4-5-air.md",
        "score": 12.5,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 91,
        "slug": "gpt-oss-20b",
        "canonicalModelKey": "gpt-oss-20b",
        "model": "GPT-OSS 20B",
        "creator": "OpenAI",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 16,
        "rankingEligible": true,
        "overallRank": 119,
        "url": "https://benchlm.ai/models/gpt-oss-20b",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-oss-20b.md",
        "score": 9.3,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 92,
        "slug": "nova-pro",
        "canonicalModelKey": "nova-pro",
        "model": "Nova Pro",
        "creator": "Amazon",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 10,
        "rankingEligible": true,
        "overallRank": 121,
        "url": "https://benchlm.ai/models/nova-pro",
        "markdownUrl": "https://benchlm.ai/md/models/nova-pro.md",
        "score": 8.6,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 93,
        "slug": "llama-4-maverick",
        "canonicalModelKey": "llama-4-maverick",
        "model": "Llama 4 Maverick",
        "creator": "Meta",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 17,
        "rankingEligible": true,
        "overallRank": 117,
        "url": "https://benchlm.ai/models/llama-4-maverick",
        "markdownUrl": "https://benchlm.ai/md/models/llama-4-maverick.md",
        "score": 8,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 94,
        "slug": "glm-4-5",
        "canonicalModelKey": "glm-4-5",
        "model": "GLM-4.5",
        "creator": "Z.AI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 25,
        "rankingEligible": true,
        "overallRank": 102,
        "url": "https://benchlm.ai/models/glm-4-5",
        "markdownUrl": "https://benchlm.ai/md/models/glm-4-5.md",
        "score": 7.8,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 95,
        "slug": "claude-3-haiku",
        "canonicalModelKey": "claude-3-haiku",
        "model": "Claude 3 Haiku",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 23,
        "rankingEligible": true,
        "overallRank": 111,
        "url": "https://benchlm.ai/models/claude-3-haiku",
        "markdownUrl": "https://benchlm.ai/md/models/claude-3-haiku.md",
        "score": 7.7,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 96,
        "slug": "gemma-3-27b",
        "canonicalModelKey": "gemma-3-27b",
        "model": "Gemma 3 27B",
        "creator": "Google",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 16,
        "rankingEligible": true,
        "overallRank": 118,
        "url": "https://benchlm.ai/models/gemma-3-27b",
        "markdownUrl": "https://benchlm.ai/md/models/gemma-3-27b.md",
        "score": 5.5,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 97,
        "slug": "llama-4-behemoth",
        "canonicalModelKey": "llama-4-behemoth",
        "model": "Llama 4 Behemoth",
        "creator": "Meta",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 11,
        "rankingEligible": true,
        "overallRank": 120,
        "url": "https://benchlm.ai/models/llama-4-behemoth",
        "markdownUrl": "https://benchlm.ai/md/models/llama-4-behemoth.md",
        "score": 3.9,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 98,
        "slug": "llama-3-70b",
        "canonicalModelKey": "llama-3-70b",
        "model": "Llama 3 70B",
        "creator": "Meta",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 25,
        "rankingEligible": true,
        "overallRank": 105,
        "url": "https://benchlm.ai/models/llama-3-70b",
        "markdownUrl": "https://benchlm.ai/md/models/llama-3-70b.md",
        "score": 2.6,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 99,
        "slug": "mistral-8x7b-v0-2",
        "canonicalModelKey": "mistral-8x7b-v0-2",
        "model": "Mistral 8x7B v0.2",
        "creator": "Mistral",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 1,
        "rankingEligible": true,
        "overallRank": 123,
        "url": "https://benchlm.ai/models/mistral-8x7b-v0-2",
        "markdownUrl": "https://benchlm.ai/md/models/mistral-8x7b-v0-2.md",
        "score": 1,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 100,
        "slug": "mistral-7b-v0-3",
        "canonicalModelKey": "mistral-7b-v0-3",
        "model": "Mistral 7B v0.3",
        "creator": "Mistral",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 4,
        "rankingEligible": true,
        "overallRank": 122,
        "url": "https://benchlm.ai/models/mistral-7b-v0-3",
        "markdownUrl": "https://benchlm.ai/md/models/mistral-7b-v0-3.md",
        "score": 0.7,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 101,
        "slug": "llama-4-scout",
        "canonicalModelKey": "llama-4-scout",
        "model": "Llama 4 Scout",
        "creator": "Meta",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "10M",
        "contextWindowTokens": 10000000,
        "displayScore": 25,
        "rankingEligible": true,
        "overallRank": 103,
        "url": "https://benchlm.ai/models/llama-4-scout",
        "markdownUrl": "https://benchlm.ai/md/models/llama-4-scout.md",
        "score": 0,
        "category": "coding",
        "categoryLabel": "Coding",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      }
    ],
    "reasoning": [
      {
        "rank": 1,
        "slug": "grok-4-1",
        "canonicalModelKey": "grok-4-1",
        "model": "Grok 4.1",
        "creator": "xAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 89,
        "rankingEligible": true,
        "overallRank": 9,
        "url": "https://benchlm.ai/models/grok-4-1",
        "markdownUrl": "https://benchlm.ai/md/models/grok-4-1.md",
        "score": 97.6,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 2,
        "slug": "gemini-3-1-pro",
        "canonicalModelKey": "gemini-3-1-pro",
        "model": "Gemini 3.1 Pro",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 91,
        "rankingEligible": true,
        "overallRank": 4,
        "url": "https://benchlm.ai/models/gemini-3-1-pro",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-3-1-pro.md",
        "score": 96.4,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 17,
        "scoreConfidence": 3
      },
      {
        "rank": 3,
        "slug": "gpt-5-4",
        "canonicalModelKey": "gpt-5-4",
        "model": "GPT-5.4",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "1.05M",
        "contextWindowTokens": 1050000,
        "displayScore": 88,
        "rankingEligible": true,
        "overallRank": 10,
        "url": "https://benchlm.ai/models/gpt-5-4",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-4.md",
        "score": 95.6,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 26,
        "scoreConfidence": 4
      },
      {
        "rank": 4,
        "slug": "gpt-5-3-codex",
        "canonicalModelKey": "gpt-5-3-codex",
        "model": "GPT-5.3 Codex",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "400K",
        "contextWindowTokens": 400000,
        "displayScore": 85,
        "rankingEligible": true,
        "overallRank": 15,
        "url": "https://benchlm.ai/models/gpt-5-3-codex",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-3-codex.md",
        "score": 93,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 6,
        "scoreConfidence": 1
      },
      {
        "rank": 5,
        "slug": "o1-preview",
        "canonicalModelKey": "o1-preview",
        "model": "o1-preview",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 82,
        "rankingEligible": true,
        "overallRank": 20,
        "url": "https://benchlm.ai/models/o1-preview",
        "markdownUrl": "https://benchlm.ai/md/models/o1-preview.md",
        "score": 88.6,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 6,
        "slug": "gemini-3-pro-deep-think",
        "canonicalModelKey": "gemini-3-pro-deep-think",
        "model": "Gemini 3 Pro Deep Think",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "2M",
        "contextWindowTokens": 2000000,
        "displayScore": 89,
        "rankingEligible": true,
        "overallRank": 8,
        "url": "https://benchlm.ai/models/gemini-3-pro-deep-think",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-3-pro-deep-think.md",
        "score": 88.5,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 7,
        "slug": "gpt-5-1-codex-max",
        "canonicalModelKey": "gpt-5-1-codex-max",
        "model": "GPT-5.1-Codex-Max",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "400K",
        "contextWindowTokens": 400000,
        "displayScore": 75,
        "rankingEligible": true,
        "overallRank": 32,
        "url": "https://benchlm.ai/models/gpt-5-1-codex-max",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-1-codex-max.md",
        "score": 88.4,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 8,
        "slug": "claude-opus-4-6",
        "canonicalModelKey": "claude-opus-4-6",
        "model": "Claude Opus 4.6",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 86,
        "rankingEligible": true,
        "overallRank": 12,
        "url": "https://benchlm.ai/models/claude-opus-4-6",
        "markdownUrl": "https://benchlm.ai/md/models/claude-opus-4-6.md",
        "score": 88.2,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 27,
        "scoreConfidence": 4
      },
      {
        "rank": 9,
        "slug": "grok-4-1-fast",
        "canonicalModelKey": "grok-4-1-fast",
        "model": "Grok 4.1 Fast",
        "creator": "xAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 68,
        "rankingEligible": true,
        "overallRank": 41,
        "url": "https://benchlm.ai/models/grok-4-1-fast",
        "markdownUrl": "https://benchlm.ai/md/models/grok-4-1-fast.md",
        "score": 87.9,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 10,
        "slug": "gpt-5-2-codex",
        "canonicalModelKey": "gpt-5-2-codex",
        "model": "GPT-5.2-Codex",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "400K",
        "contextWindowTokens": 400000,
        "displayScore": 76,
        "rankingEligible": true,
        "overallRank": 30,
        "url": "https://benchlm.ai/models/gpt-5-2-codex",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-2-codex.md",
        "score": 87.8,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 11,
        "slug": "glm-5-reasoning",
        "canonicalModelKey": "glm-5-reasoning",
        "model": "GLM-5 (Reasoning)",
        "creator": "Z.AI",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 79,
        "rankingEligible": true,
        "overallRank": 24,
        "url": "https://benchlm.ai/models/glm-5-reasoning",
        "markdownUrl": "https://benchlm.ai/md/models/glm-5-reasoning.md",
        "score": 87.4,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 12,
        "slug": "gpt-5-2",
        "canonicalModelKey": "gpt-5-2",
        "model": "GPT-5.2",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "400K",
        "contextWindowTokens": 400000,
        "displayScore": 78,
        "rankingEligible": true,
        "overallRank": 25,
        "url": "https://benchlm.ai/models/gpt-5-2",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-2.md",
        "score": 83.5,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 11,
        "scoreConfidence": 2
      },
      {
        "rank": 13,
        "slug": "claude-sonnet-4-6",
        "canonicalModelKey": "claude-sonnet-4-6",
        "model": "Claude Sonnet 4.6",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 82,
        "rankingEligible": true,
        "overallRank": 18,
        "url": "https://benchlm.ai/models/claude-sonnet-4-6",
        "markdownUrl": "https://benchlm.ai/md/models/claude-sonnet-4-6.md",
        "score": 82.9,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 14,
        "scoreConfidence": 3
      },
      {
        "rank": 14,
        "slug": "gemini-3-pro",
        "canonicalModelKey": "gemini-3-pro",
        "model": "Gemini 3 Pro",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "2M",
        "contextWindowTokens": 2000000,
        "displayScore": 80,
        "rankingEligible": true,
        "overallRank": 22,
        "url": "https://benchlm.ai/models/gemini-3-pro",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-3-pro.md",
        "score": 82.4,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 8,
        "scoreConfidence": 2
      },
      {
        "rank": 15,
        "slug": "qwen3-5-397b-reasoning",
        "canonicalModelKey": "qwen3-5-397b-reasoning",
        "model": "Qwen3.5 397B (Reasoning)",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 77,
        "rankingEligible": true,
        "overallRank": 26,
        "url": "https://benchlm.ai/models/qwen3-5-397b-reasoning",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-5-397b-reasoning.md",
        "score": 81.7,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 16,
        "slug": "gpt-4-1-mini",
        "canonicalModelKey": "gpt-4-1-mini",
        "model": "GPT-4.1 mini",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 45,
        "rankingEligible": true,
        "overallRank": 77,
        "url": "https://benchlm.ai/models/gpt-4-1-mini",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-4-1-mini.md",
        "score": 80.3,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 4,
        "scoreConfidence": 1
      },
      {
        "rank": 17,
        "slug": "gemini-3-5-flash",
        "canonicalModelKey": "gemini-3-5-flash",
        "model": "Gemini 3.5 Flash",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 86,
        "rankingEligible": true,
        "overallRank": 13,
        "url": "https://benchlm.ai/models/gemini-3-5-flash",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-3-5-flash.md",
        "score": 79.4,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 20,
        "scoreConfidence": 3
      },
      {
        "rank": 18,
        "slug": "gpt-4-1",
        "canonicalModelKey": "gpt-4-1",
        "model": "GPT-4.1",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 57,
        "rankingEligible": true,
        "overallRank": 56,
        "url": "https://benchlm.ai/models/gpt-4-1",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-4-1.md",
        "score": 76.4,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 4,
        "scoreConfidence": 1
      },
      {
        "rank": 19,
        "slug": "gpt-5-high",
        "canonicalModelKey": "gpt-5-high",
        "model": "GPT-5 (high)",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 76,
        "rankingEligible": true,
        "overallRank": 29,
        "url": "https://benchlm.ai/models/gpt-5-high",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-high.md",
        "score": 76.3,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 20,
        "slug": "o1",
        "canonicalModelKey": "o1",
        "model": "o1",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 57,
        "rankingEligible": true,
        "overallRank": 58,
        "url": "https://benchlm.ai/models/o1",
        "markdownUrl": "https://benchlm.ai/md/models/o1.md",
        "score": 75,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 3,
        "scoreConfidence": 1
      },
      {
        "rank": 21,
        "slug": "gpt-5-medium",
        "canonicalModelKey": "gpt-5-medium",
        "model": "GPT-5 (medium)",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 70,
        "rankingEligible": true,
        "overallRank": 37,
        "url": "https://benchlm.ai/models/gpt-5-medium",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-medium.md",
        "score": 74.1,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 22,
        "slug": "glm-4-7",
        "canonicalModelKey": "glm-4-7",
        "model": "GLM-4.7",
        "creator": "Z.AI",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 68,
        "rankingEligible": true,
        "overallRank": 40,
        "url": "https://benchlm.ai/models/glm-4-7",
        "markdownUrl": "https://benchlm.ai/md/models/glm-4-7.md",
        "score": 72.9,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 7,
        "scoreConfidence": 1
      },
      {
        "rank": 23,
        "slug": "o3-pro",
        "canonicalModelKey": "o3-pro",
        "model": "o3-pro",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 57,
        "rankingEligible": true,
        "overallRank": 57,
        "url": "https://benchlm.ai/models/o3-pro",
        "markdownUrl": "https://benchlm.ai/md/models/o3-pro.md",
        "score": 70.9,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 24,
        "slug": "qwen2-5-1m",
        "canonicalModelKey": "qwen2-5-1m",
        "model": "Qwen2.5-1M",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 50,
        "rankingEligible": true,
        "overallRank": 69,
        "url": "https://benchlm.ai/models/qwen2-5-1m",
        "markdownUrl": "https://benchlm.ai/md/models/qwen2-5-1m.md",
        "score": 70.6,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 25,
        "slug": "claude-opus-4-5",
        "canonicalModelKey": "claude-opus-4-5",
        "model": "Claude Opus 4.5",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 76,
        "rankingEligible": true,
        "overallRank": 28,
        "url": "https://benchlm.ai/models/claude-opus-4-5",
        "markdownUrl": "https://benchlm.ai/md/models/claude-opus-4-5.md",
        "score": 69.6,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 41,
        "scoreConfidence": 4
      },
      {
        "rank": 26,
        "slug": "kimi-k2-5-reasoning",
        "canonicalModelKey": "kimi-k2-5-reasoning",
        "model": "Kimi K2.5 (Reasoning)",
        "creator": "Moonshot AI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 75,
        "rankingEligible": true,
        "overallRank": 31,
        "url": "https://benchlm.ai/models/kimi-k2-5-reasoning",
        "markdownUrl": "https://benchlm.ai/md/models/kimi-k2-5-reasoning.md",
        "score": 69.2,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 8,
        "scoreConfidence": 2
      },
      {
        "rank": 27,
        "slug": "o3-mini",
        "canonicalModelKey": "o3-mini",
        "model": "o3-mini",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 55,
        "rankingEligible": true,
        "overallRank": 64,
        "url": "https://benchlm.ai/models/o3-mini",
        "markdownUrl": "https://benchlm.ai/md/models/o3-mini.md",
        "score": 68,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 5,
        "scoreConfidence": 1
      },
      {
        "rank": 28,
        "slug": "gpt-4-1-nano",
        "canonicalModelKey": "gpt-4-1-nano",
        "model": "GPT-4.1 nano",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 27,
        "rankingEligible": true,
        "overallRank": 101,
        "url": "https://benchlm.ai/models/gpt-4-1-nano",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-4-1-nano.md",
        "score": 67.4,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 3,
        "scoreConfidence": 1
      },
      {
        "rank": 29,
        "slug": "gpt-5-1",
        "canonicalModelKey": "gpt-5-1",
        "model": "GPT-5.1",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 77,
        "rankingEligible": true,
        "overallRank": 27,
        "url": "https://benchlm.ai/models/gpt-5-1",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-1.md",
        "score": 66.7,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 30,
        "slug": "gemini-3-flash",
        "canonicalModelKey": "gemini-3-flash",
        "model": "Gemini 3 Flash",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 55,
        "rankingEligible": true,
        "overallRank": 63,
        "url": "https://benchlm.ai/models/gemini-3-flash",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-3-flash.md",
        "score": 66.1,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 2,
        "scoreConfidence": 1
      },
      {
        "rank": 31,
        "slug": "glm-5-1",
        "canonicalModelKey": "glm-5-1",
        "model": "GLM-5.1",
        "creator": "Z.AI",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "203K",
        "contextWindowTokens": 203000,
        "displayScore": 82,
        "rankingEligible": true,
        "overallRank": 17,
        "url": "https://benchlm.ai/models/glm-5-1",
        "markdownUrl": "https://benchlm.ai/md/models/glm-5-1.md",
        "score": 64.8,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 16,
        "scoreConfidence": 3
      },
      {
        "rank": 32,
        "slug": "claude-sonnet-4-5",
        "canonicalModelKey": "claude-sonnet-4-5",
        "model": "Claude Sonnet 4.5",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 64,
        "rankingEligible": true,
        "overallRank": 46,
        "url": "https://benchlm.ai/models/claude-sonnet-4-5",
        "markdownUrl": "https://benchlm.ai/md/models/claude-sonnet-4-5.md",
        "score": 62,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 6,
        "scoreConfidence": 1
      },
      {
        "rank": 33,
        "slug": "glm-5",
        "canonicalModelKey": "glm-5",
        "model": "GLM-5",
        "creator": "Z.AI",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 67,
        "rankingEligible": true,
        "overallRank": 42,
        "url": "https://benchlm.ai/models/glm-5",
        "markdownUrl": "https://benchlm.ai/md/models/glm-5.md",
        "score": 60.5,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 33,
        "scoreConfidence": 4
      },
      {
        "rank": 34,
        "slug": "deepseekmath-v2",
        "canonicalModelKey": "deepseekmath-v2",
        "model": "DeepSeekMath V2",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 49,
        "rankingEligible": true,
        "overallRank": 73,
        "url": "https://benchlm.ai/models/deepseekmath-v2",
        "markdownUrl": "https://benchlm.ai/md/models/deepseekmath-v2.md",
        "score": 60.2,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 35,
        "slug": "qwen2-5-72b",
        "canonicalModelKey": "qwen2-5-72b",
        "model": "Qwen2.5-72B",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 49,
        "rankingEligible": true,
        "overallRank": 72,
        "url": "https://benchlm.ai/models/qwen2-5-72b",
        "markdownUrl": "https://benchlm.ai/md/models/qwen2-5-72b.md",
        "score": 60.1,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 36,
        "slug": "deepseek-coder-2-0",
        "canonicalModelKey": "deepseek-coder-2-0",
        "model": "DeepSeek Coder 2.0",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 51,
        "rankingEligible": true,
        "overallRank": 66,
        "url": "https://benchlm.ai/models/deepseek-coder-2-0",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-coder-2-0.md",
        "score": 59.5,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 37,
        "slug": "gemini-2-5-pro",
        "canonicalModelKey": "gemini-2-5-pro",
        "model": "Gemini 2.5 Pro",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 63,
        "rankingEligible": true,
        "overallRank": 49,
        "url": "https://benchlm.ai/models/gemini-2-5-pro",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-2-5-pro.md",
        "score": 59,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 3,
        "scoreConfidence": 1
      },
      {
        "rank": 38,
        "slug": "gemini-3-1-flash-lite",
        "canonicalModelKey": "gemini-3-1-flash-lite",
        "model": "Gemini 3.1 Flash-Lite",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 47,
        "rankingEligible": true,
        "overallRank": 75,
        "url": "https://benchlm.ai/models/gemini-3-1-flash-lite",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-3-1-flash-lite.md",
        "score": 58.8,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 2,
        "scoreConfidence": 1
      },
      {
        "rank": 39,
        "slug": "claude-haiku-4-5",
        "canonicalModelKey": "claude-haiku-4-5",
        "model": "Claude Haiku 4.5",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 56,
        "rankingEligible": true,
        "overallRank": 60,
        "url": "https://benchlm.ai/models/claude-haiku-4-5",
        "markdownUrl": "https://benchlm.ai/md/models/claude-haiku-4-5.md",
        "score": 58.6,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 40,
        "slug": "qwen3-5-397b",
        "canonicalModelKey": "qwen3-5-397b",
        "model": "Qwen3.5 397B",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 62,
        "rankingEligible": true,
        "overallRank": 51,
        "url": "https://benchlm.ai/models/qwen3-5-397b",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-5-397b.md",
        "score": 58.4,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 36,
        "scoreConfidence": 4
      },
      {
        "rank": 41,
        "slug": "deepseek-llm-2-0",
        "canonicalModelKey": "deepseek-llm-2-0",
        "model": "DeepSeek LLM 2.0",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 50,
        "rankingEligible": true,
        "overallRank": 68,
        "url": "https://benchlm.ai/models/deepseek-llm-2-0",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-llm-2-0.md",
        "score": 58.4,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 42,
        "slug": "nemotron-3-super-100b",
        "canonicalModelKey": "nemotron-3-super-100b",
        "model": "Nemotron 3 Super 100B",
        "creator": "NVIDIA",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 43,
        "rankingEligible": true,
        "overallRank": 78,
        "url": "https://benchlm.ai/models/nemotron-3-super-100b",
        "markdownUrl": "https://benchlm.ai/md/models/nemotron-3-super-100b.md",
        "score": 57,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 43,
        "slug": "claude-4-1-opus",
        "canonicalModelKey": "claude-4-1-opus",
        "model": "Claude 4.1 Opus",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 51,
        "rankingEligible": true,
        "overallRank": 67,
        "url": "https://benchlm.ai/models/claude-4-1-opus",
        "markdownUrl": "https://benchlm.ai/md/models/claude-4-1-opus.md",
        "score": 55.7,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 44,
        "slug": "mimo-v2-flash",
        "canonicalModelKey": "mimo-v2-flash",
        "model": "MiMo-V2-Flash",
        "creator": "Xiaomi",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "256K",
        "contextWindowTokens": 256000,
        "displayScore": 59,
        "rankingEligible": true,
        "overallRank": 54,
        "url": "https://benchlm.ai/models/mimo-v2-flash",
        "markdownUrl": "https://benchlm.ai/md/models/mimo-v2-flash.md",
        "score": 55.6,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 45,
        "slug": "grok-4",
        "canonicalModelKey": "grok-4",
        "model": "Grok 4",
        "creator": "xAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 63,
        "rankingEligible": true,
        "overallRank": 50,
        "url": "https://benchlm.ai/models/grok-4",
        "markdownUrl": "https://benchlm.ai/md/models/grok-4.md",
        "score": 55.2,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 46,
        "slug": "o3",
        "canonicalModelKey": "o3",
        "model": "o3",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 56,
        "rankingEligible": true,
        "overallRank": 61,
        "url": "https://benchlm.ai/models/o3",
        "markdownUrl": "https://benchlm.ai/md/models/o3.md",
        "score": 55.2,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 47,
        "slug": "deepseek-v3-2-thinking",
        "canonicalModelKey": "deepseek-v3-2-thinking",
        "model": "DeepSeek V3.2 (Thinking)",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 60,
        "rankingEligible": true,
        "overallRank": 53,
        "url": "https://benchlm.ai/models/deepseek-v3-2-thinking",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-v3-2-thinking.md",
        "score": 55,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 48,
        "slug": "claude-4-sonnet",
        "canonicalModelKey": "claude-4-sonnet",
        "model": "Claude 4 Sonnet",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 50,
        "rankingEligible": true,
        "overallRank": 70,
        "url": "https://benchlm.ai/models/claude-4-sonnet",
        "markdownUrl": "https://benchlm.ai/md/models/claude-4-sonnet.md",
        "score": 54.7,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 49,
        "slug": "kimi-k2-5",
        "canonicalModelKey": "kimi-k2-5",
        "model": "Kimi K2.5",
        "creator": "Moonshot AI",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "256K",
        "contextWindowTokens": 256000,
        "displayScore": 63,
        "rankingEligible": true,
        "overallRank": 47,
        "url": "https://benchlm.ai/models/kimi-k2-5",
        "markdownUrl": "https://benchlm.ai/md/models/kimi-k2-5.md",
        "score": 54.6,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 39,
        "scoreConfidence": 4
      },
      {
        "rank": 50,
        "slug": "gemini-1-5-pro",
        "canonicalModelKey": "gemini-1-5-pro",
        "model": "Gemini 1.5 Pro",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "2M",
        "contextWindowTokens": 2000000,
        "displayScore": 35,
        "rankingEligible": true,
        "overallRank": 89,
        "url": "https://benchlm.ai/models/gemini-1-5-pro",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-1-5-pro.md",
        "score": 52.8,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 51,
        "slug": "mistral-large-3",
        "canonicalModelKey": "mistral-large-3",
        "model": "Mistral Large 3",
        "creator": "Mistral",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 48,
        "rankingEligible": true,
        "overallRank": 74,
        "url": "https://benchlm.ai/models/mistral-large-3",
        "markdownUrl": "https://benchlm.ai/md/models/mistral-large-3.md",
        "score": 52,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 52,
        "slug": "llama-3-1-405b",
        "canonicalModelKey": "llama-3-1-405b",
        "model": "Llama 3.1 405B",
        "creator": "Meta",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 40,
        "rankingEligible": true,
        "overallRank": 83,
        "url": "https://benchlm.ai/models/llama-3-1-405b",
        "markdownUrl": "https://benchlm.ai/md/models/llama-3-1-405b.md",
        "score": 50.3,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 53,
        "slug": "claude-3-5-sonnet",
        "canonicalModelKey": "claude-3-5-sonnet",
        "model": "Claude 3.5 Sonnet",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 40,
        "rankingEligible": true,
        "overallRank": 84,
        "url": "https://benchlm.ai/models/claude-3-5-sonnet",
        "markdownUrl": "https://benchlm.ai/md/models/claude-3-5-sonnet.md",
        "score": 49.8,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 54,
        "slug": "claude-4-1-opus-thinking",
        "canonicalModelKey": "claude-4-1-opus-thinking",
        "model": "Claude 4.1 Opus Thinking",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 43,
        "rankingEligible": true,
        "overallRank": 80,
        "url": "https://benchlm.ai/models/claude-4-1-opus-thinking",
        "markdownUrl": "https://benchlm.ai/md/models/claude-4-1-opus-thinking.md",
        "score": 49.2,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 55,
        "slug": "o4-mini-high",
        "canonicalModelKey": "o4-mini-high",
        "model": "o4-mini (high)",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 43,
        "rankingEligible": true,
        "overallRank": 79,
        "url": "https://benchlm.ai/models/o4-mini-high",
        "markdownUrl": "https://benchlm.ai/md/models/o4-mini-high.md",
        "score": 48.1,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 56,
        "slug": "deepseek-v3-2",
        "canonicalModelKey": "deepseek-v3-2",
        "model": "DeepSeek V3.2",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 56,
        "rankingEligible": true,
        "overallRank": 59,
        "url": "https://benchlm.ai/models/deepseek-v3-2",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-v3-2.md",
        "score": 47.4,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 4,
        "scoreConfidence": 1
      },
      {
        "rank": 57,
        "slug": "gpt-4o",
        "canonicalModelKey": "gpt-4o",
        "model": "GPT-4o",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 42,
        "rankingEligible": true,
        "overallRank": 81,
        "url": "https://benchlm.ai/models/gpt-4o",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-4o.md",
        "score": 46.8,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 58,
        "slug": "mistral-large-2",
        "canonicalModelKey": "mistral-large-2",
        "model": "Mistral Large 2",
        "creator": "Mistral",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 38,
        "rankingEligible": true,
        "overallRank": 87,
        "url": "https://benchlm.ai/models/mistral-large-2",
        "markdownUrl": "https://benchlm.ai/md/models/mistral-large-2.md",
        "score": 44.8,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 59,
        "slug": "gemini-2-5-flash",
        "canonicalModelKey": "gemini-2-5-flash",
        "model": "Gemini 2.5 Flash",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 37,
        "rankingEligible": true,
        "overallRank": 88,
        "url": "https://benchlm.ai/models/gemini-2-5-flash",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-2-5-flash.md",
        "score": 44,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 60,
        "slug": "grok-code-fast-1",
        "canonicalModelKey": "grok-code-fast-1",
        "model": "Grok Code Fast 1",
        "creator": "xAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "256K",
        "contextWindowTokens": 256000,
        "displayScore": 39,
        "rankingEligible": true,
        "overallRank": 85,
        "url": "https://benchlm.ai/models/grok-code-fast-1",
        "markdownUrl": "https://benchlm.ai/md/models/grok-code-fast-1.md",
        "score": 43.1,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 61,
        "slug": "claude-3-opus",
        "canonicalModelKey": "claude-3-opus",
        "model": "Claude 3 Opus",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 34,
        "rankingEligible": true,
        "overallRank": 92,
        "url": "https://benchlm.ai/models/claude-3-opus",
        "markdownUrl": "https://benchlm.ai/md/models/claude-3-opus.md",
        "score": 43,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 62,
        "slug": "llama-4-scout",
        "canonicalModelKey": "llama-4-scout",
        "model": "Llama 4 Scout",
        "creator": "Meta",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "10M",
        "contextWindowTokens": 10000000,
        "displayScore": 25,
        "rankingEligible": true,
        "overallRank": 103,
        "url": "https://benchlm.ai/models/llama-4-scout",
        "markdownUrl": "https://benchlm.ai/md/models/llama-4-scout.md",
        "score": 41.1,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 63,
        "slug": "llama-3-70b",
        "canonicalModelKey": "llama-3-70b",
        "model": "Llama 3 70B",
        "creator": "Meta",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 25,
        "rankingEligible": true,
        "overallRank": 105,
        "url": "https://benchlm.ai/models/llama-3-70b",
        "markdownUrl": "https://benchlm.ai/md/models/llama-3-70b.md",
        "score": 39.4,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 64,
        "slug": "gpt-4-turbo",
        "canonicalModelKey": "gpt-4-turbo",
        "model": "GPT-4 Turbo",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 24,
        "rankingEligible": true,
        "overallRank": 107,
        "url": "https://benchlm.ai/models/gpt-4-turbo",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-4-turbo.md",
        "score": 38.7,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 65,
        "slug": "claude-3-haiku",
        "canonicalModelKey": "claude-3-haiku",
        "model": "Claude 3 Haiku",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 23,
        "rankingEligible": true,
        "overallRank": 111,
        "url": "https://benchlm.ai/models/claude-3-haiku",
        "markdownUrl": "https://benchlm.ai/md/models/claude-3-haiku.md",
        "score": 37.4,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 66,
        "slug": "qwen3-235b-2507-reasoning",
        "canonicalModelKey": "qwen3-235b-2507-reasoning",
        "model": "Qwen3 235B 2507 (Reasoning)",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 45,
        "rankingEligible": true,
        "overallRank": 76,
        "url": "https://benchlm.ai/models/qwen3-235b-2507-reasoning",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-235b-2507-reasoning.md",
        "score": 35.9,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 67,
        "slug": "o1-pro",
        "canonicalModelKey": "o1-pro",
        "model": "o1-pro",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 28,
        "rankingEligible": true,
        "overallRank": 99,
        "url": "https://benchlm.ai/models/o1-pro",
        "markdownUrl": "https://benchlm.ai/md/models/o1-pro.md",
        "score": 33.5,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 68,
        "slug": "llama-4-maverick",
        "canonicalModelKey": "llama-4-maverick",
        "model": "Llama 4 Maverick",
        "creator": "Meta",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 17,
        "rankingEligible": true,
        "overallRank": 117,
        "url": "https://benchlm.ai/models/llama-4-maverick",
        "markdownUrl": "https://benchlm.ai/md/models/llama-4-maverick.md",
        "score": 32.7,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 69,
        "slug": "mistral-8x7b",
        "canonicalModelKey": "mistral-8x7b",
        "model": "Mistral 8x7B",
        "creator": "Mistral",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 23,
        "rankingEligible": true,
        "overallRank": 110,
        "url": "https://benchlm.ai/models/mistral-8x7b",
        "markdownUrl": "https://benchlm.ai/md/models/mistral-8x7b.md",
        "score": 32.2,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 70,
        "slug": "gpt-oss-120b",
        "canonicalModelKey": "gpt-oss-120b",
        "model": "GPT-OSS 120B",
        "creator": "OpenAI",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 34,
        "rankingEligible": true,
        "overallRank": 91,
        "url": "https://benchlm.ai/models/gpt-oss-120b",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-oss-120b.md",
        "score": 30.2,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 71,
        "slug": "moonshot-v1",
        "canonicalModelKey": "moonshot-v1",
        "model": "Moonshot v1",
        "creator": "Moonshot AI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 22,
        "rankingEligible": true,
        "overallRank": 114,
        "url": "https://benchlm.ai/models/moonshot-v1",
        "markdownUrl": "https://benchlm.ai/md/models/moonshot-v1.md",
        "score": 30.1,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 72,
        "slug": "z-1",
        "canonicalModelKey": "z-1",
        "model": "Z-1",
        "creator": "Z",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 23,
        "rankingEligible": true,
        "overallRank": 109,
        "url": "https://benchlm.ai/models/z-1",
        "markdownUrl": "https://benchlm.ai/md/models/z-1.md",
        "score": 29.4,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 73,
        "slug": "grok-3-beta",
        "canonicalModelKey": "grok-3-beta",
        "model": "Grok 3 [Beta]",
        "creator": "xAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 30,
        "rankingEligible": true,
        "overallRank": 97,
        "url": "https://benchlm.ai/models/grok-3-beta",
        "markdownUrl": "https://benchlm.ai/md/models/grok-3-beta.md",
        "score": 28.6,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 74,
        "slug": "nemotron-3-nano-30b",
        "canonicalModelKey": "nemotron-3-nano-30b",
        "model": "Nemotron 3 Nano 30B",
        "creator": "NVIDIA",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 25,
        "rankingEligible": true,
        "overallRank": 104,
        "url": "https://benchlm.ai/models/nemotron-3-nano-30b",
        "markdownUrl": "https://benchlm.ai/md/models/nemotron-3-nano-30b.md",
        "score": 28.1,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 75,
        "slug": "gemini-1-0-pro",
        "canonicalModelKey": "gemini-1-0-pro",
        "model": "Gemini 1.0 Pro",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 24,
        "rankingEligible": true,
        "overallRank": 108,
        "url": "https://benchlm.ai/models/gemini-1-0-pro",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-1-0-pro.md",
        "score": 27.4,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 76,
        "slug": "deepseek-v3-1-reasoning",
        "canonicalModelKey": "deepseek-v3-1-reasoning",
        "model": "DeepSeek V3.1 (Reasoning)",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 29,
        "rankingEligible": true,
        "overallRank": 98,
        "url": "https://benchlm.ai/models/deepseek-v3-1-reasoning",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-v3-1-reasoning.md",
        "score": 26.7,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 77,
        "slug": "nemotron-ultra-253b",
        "canonicalModelKey": "nemotron-ultra-253b",
        "model": "Nemotron Ultra 253B",
        "creator": "NVIDIA",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 22,
        "rankingEligible": true,
        "overallRank": 115,
        "url": "https://benchlm.ai/models/nemotron-ultra-253b",
        "markdownUrl": "https://benchlm.ai/md/models/nemotron-ultra-253b.md",
        "score": 26.6,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 78,
        "slug": "qwen3-235b-2507",
        "canonicalModelKey": "qwen3-235b-2507",
        "model": "Qwen3 235B 2507",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 32,
        "rankingEligible": true,
        "overallRank": 95,
        "url": "https://benchlm.ai/models/qwen3-235b-2507",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-235b-2507.md",
        "score": 25.6,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 4,
        "scoreConfidence": 1
      },
      {
        "rank": 79,
        "slug": "nemotron-4-15b",
        "canonicalModelKey": "nemotron-4-15b",
        "model": "Nemotron-4 15B",
        "creator": "NVIDIA",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 22,
        "rankingEligible": true,
        "overallRank": 113,
        "url": "https://benchlm.ai/models/nemotron-4-15b",
        "markdownUrl": "https://benchlm.ai/md/models/nemotron-4-15b.md",
        "score": 24.4,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 80,
        "slug": "deepseek-r1",
        "canonicalModelKey": "deepseek-r1",
        "model": "DeepSeek-R1",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 32,
        "rankingEligible": true,
        "overallRank": 94,
        "url": "https://benchlm.ai/models/deepseek-r1",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-r1.md",
        "score": 23.5,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 81,
        "slug": "gpt-4o-mini",
        "canonicalModelKey": "gpt-4o-mini",
        "model": "GPT-4o mini",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 49,
        "rankingEligible": true,
        "overallRank": 71,
        "url": "https://benchlm.ai/models/gpt-4o-mini",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-4o-mini.md",
        "score": 20.6,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 82,
        "slug": "gemma-3-27b",
        "canonicalModelKey": "gemma-3-27b",
        "model": "Gemma 3 27B",
        "creator": "Google",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 16,
        "rankingEligible": true,
        "overallRank": 118,
        "url": "https://benchlm.ai/models/gemma-3-27b",
        "markdownUrl": "https://benchlm.ai/md/models/gemma-3-27b.md",
        "score": 19.9,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 83,
        "slug": "glm-4-5",
        "canonicalModelKey": "glm-4-5",
        "model": "GLM-4.5",
        "creator": "Z.AI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 25,
        "rankingEligible": true,
        "overallRank": 102,
        "url": "https://benchlm.ai/models/glm-4-5",
        "markdownUrl": "https://benchlm.ai/md/models/glm-4-5.md",
        "score": 19.8,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 84,
        "slug": "nova-pro",
        "canonicalModelKey": "nova-pro",
        "model": "Nova Pro",
        "creator": "Amazon",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 10,
        "rankingEligible": true,
        "overallRank": 121,
        "url": "https://benchlm.ai/models/nova-pro",
        "markdownUrl": "https://benchlm.ai/md/models/nova-pro.md",
        "score": 18,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 85,
        "slug": "deepseek-v3-1",
        "canonicalModelKey": "deepseek-v3-1",
        "model": "DeepSeek V3.1",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 24,
        "rankingEligible": true,
        "overallRank": 106,
        "url": "https://benchlm.ai/models/deepseek-v3-1",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-v3-1.md",
        "score": 15.3,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 86,
        "slug": "glm-4-5-air",
        "canonicalModelKey": "glm-4-5-air",
        "model": "GLM-4.5-Air",
        "creator": "Z.AI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 18,
        "rankingEligible": true,
        "overallRank": 116,
        "url": "https://benchlm.ai/models/glm-4-5-air",
        "markdownUrl": "https://benchlm.ai/md/models/glm-4-5-air.md",
        "score": 14.8,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 87,
        "slug": "llama-4-behemoth",
        "canonicalModelKey": "llama-4-behemoth",
        "model": "Llama 4 Behemoth",
        "creator": "Meta",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 11,
        "rankingEligible": true,
        "overallRank": 120,
        "url": "https://benchlm.ai/models/llama-4-behemoth",
        "markdownUrl": "https://benchlm.ai/md/models/llama-4-behemoth.md",
        "score": 14.2,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 88,
        "slug": "gpt-oss-20b",
        "canonicalModelKey": "gpt-oss-20b",
        "model": "GPT-OSS 20B",
        "creator": "OpenAI",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 16,
        "rankingEligible": true,
        "overallRank": 119,
        "url": "https://benchlm.ai/models/gpt-oss-20b",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-oss-20b.md",
        "score": 11.7,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 89,
        "slug": "mistral-7b-v0-3",
        "canonicalModelKey": "mistral-7b-v0-3",
        "model": "Mistral 7B v0.3",
        "creator": "Mistral",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 4,
        "rankingEligible": true,
        "overallRank": 122,
        "url": "https://benchlm.ai/models/mistral-7b-v0-3",
        "markdownUrl": "https://benchlm.ai/md/models/mistral-7b-v0-3.md",
        "score": 2.7,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 90,
        "slug": "dbrx-instruct",
        "canonicalModelKey": "dbrx-instruct",
        "model": "DBRX Instruct",
        "creator": "Databricks",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 32,
        "rankingEligible": true,
        "overallRank": 96,
        "url": "https://benchlm.ai/models/dbrx-instruct",
        "markdownUrl": "https://benchlm.ai/md/models/dbrx-instruct.md",
        "score": 0,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 91,
        "slug": "phi-4",
        "canonicalModelKey": "phi-4",
        "model": "Phi-4",
        "creator": "Microsoft",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "16K",
        "contextWindowTokens": 16000,
        "displayScore": 27,
        "rankingEligible": true,
        "overallRank": 100,
        "url": "https://benchlm.ai/models/phi-4",
        "markdownUrl": "https://benchlm.ai/md/models/phi-4.md",
        "score": 0,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 92,
        "slug": "mixtral-8x22b-instruct-v0-1",
        "canonicalModelKey": "mixtral-8x22b-instruct-v0-1",
        "model": "Mixtral 8x22B Instruct v0.1",
        "creator": "Mistral",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "64K",
        "contextWindowTokens": 64000,
        "displayScore": 22,
        "rankingEligible": true,
        "overallRank": 112,
        "url": "https://benchlm.ai/models/mixtral-8x22b-instruct-v0-1",
        "markdownUrl": "https://benchlm.ai/md/models/mixtral-8x22b-instruct-v0-1.md",
        "score": 0,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 93,
        "slug": "mistral-8x7b-v0-2",
        "canonicalModelKey": "mistral-8x7b-v0-2",
        "model": "Mistral 8x7B v0.2",
        "creator": "Mistral",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 1,
        "rankingEligible": true,
        "overallRank": 123,
        "url": "https://benchlm.ai/models/mistral-8x7b-v0-2",
        "markdownUrl": "https://benchlm.ai/md/models/mistral-8x7b-v0-2.md",
        "score": 0,
        "category": "reasoning",
        "categoryLabel": "Reasoning",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      }
    ],
    "multimodalGrounded": [
      {
        "rank": 1,
        "slug": "gemini-3-pro-deep-think",
        "canonicalModelKey": "gemini-3-pro-deep-think",
        "model": "Gemini 3 Pro Deep Think",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "2M",
        "contextWindowTokens": 2000000,
        "displayScore": 89,
        "rankingEligible": true,
        "overallRank": 8,
        "url": "https://benchlm.ai/models/gemini-3-pro-deep-think",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-3-pro-deep-think.md",
        "score": 100,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 2,
        "slug": "claude-mythos-5",
        "canonicalModelKey": "claude-mythos-5",
        "model": "Claude Mythos 5",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "1M+",
        "contextWindowTokens": 1000000,
        "displayScore": 99,
        "rankingEligible": true,
        "overallRank": 1,
        "url": "https://benchlm.ai/models/claude-mythos-5",
        "markdownUrl": "https://benchlm.ai/md/models/claude-mythos-5.md",
        "score": 98.9,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 17,
        "scoreConfidence": 3
      },
      {
        "rank": 3,
        "slug": "grok-4-1",
        "canonicalModelKey": "grok-4-1",
        "model": "Grok 4.1",
        "creator": "xAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 89,
        "rankingEligible": true,
        "overallRank": 9,
        "url": "https://benchlm.ai/models/grok-4-1",
        "markdownUrl": "https://benchlm.ai/md/models/grok-4-1.md",
        "score": 97.8,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 4,
        "slug": "gpt-5-1",
        "canonicalModelKey": "gpt-5-1",
        "model": "GPT-5.1",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 77,
        "rankingEligible": true,
        "overallRank": 27,
        "url": "https://benchlm.ai/models/gpt-5-1",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-1.md",
        "score": 96.3,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 5,
        "slug": "gpt-5-3-codex",
        "canonicalModelKey": "gpt-5-3-codex",
        "model": "GPT-5.3 Codex",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "400K",
        "contextWindowTokens": 400000,
        "displayScore": 85,
        "rankingEligible": true,
        "overallRank": 15,
        "url": "https://benchlm.ai/models/gpt-5-3-codex",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-3-codex.md",
        "score": 95.8,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 6,
        "scoreConfidence": 1
      },
      {
        "rank": 6,
        "slug": "claude-sonnet-4-5",
        "canonicalModelKey": "claude-sonnet-4-5",
        "model": "Claude Sonnet 4.5",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 64,
        "rankingEligible": true,
        "overallRank": 46,
        "url": "https://benchlm.ai/models/claude-sonnet-4-5",
        "markdownUrl": "https://benchlm.ai/md/models/claude-sonnet-4-5.md",
        "score": 94.8,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 6,
        "scoreConfidence": 1
      },
      {
        "rank": 7,
        "slug": "gpt-5-high",
        "canonicalModelKey": "gpt-5-high",
        "model": "GPT-5 (high)",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 76,
        "rankingEligible": true,
        "overallRank": 29,
        "url": "https://benchlm.ai/models/gpt-5-high",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-high.md",
        "score": 93.3,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 8,
        "slug": "gpt-5-medium",
        "canonicalModelKey": "gpt-5-medium",
        "model": "GPT-5 (medium)",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 70,
        "rankingEligible": true,
        "overallRank": 37,
        "url": "https://benchlm.ai/models/gpt-5-medium",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-medium.md",
        "score": 90.6,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 9,
        "slug": "gpt-5-1-codex-max",
        "canonicalModelKey": "gpt-5-1-codex-max",
        "model": "GPT-5.1-Codex-Max",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "400K",
        "contextWindowTokens": 400000,
        "displayScore": 75,
        "rankingEligible": true,
        "overallRank": 32,
        "url": "https://benchlm.ai/models/gpt-5-1-codex-max",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-1-codex-max.md",
        "score": 90.1,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 10,
        "slug": "grok-4-1-fast",
        "canonicalModelKey": "grok-4-1-fast",
        "model": "Grok 4.1 Fast",
        "creator": "xAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 68,
        "rankingEligible": true,
        "overallRank": 41,
        "url": "https://benchlm.ai/models/grok-4-1-fast",
        "markdownUrl": "https://benchlm.ai/md/models/grok-4-1-fast.md",
        "score": 89.7,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 11,
        "slug": "gpt-5-2-codex",
        "canonicalModelKey": "gpt-5-2-codex",
        "model": "GPT-5.2-Codex",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "400K",
        "contextWindowTokens": 400000,
        "displayScore": 76,
        "rankingEligible": true,
        "overallRank": 30,
        "url": "https://benchlm.ai/models/gpt-5-2-codex",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-2-codex.md",
        "score": 89,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 12,
        "slug": "claude-sonnet-4-6",
        "canonicalModelKey": "claude-sonnet-4-6",
        "model": "Claude Sonnet 4.6",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 82,
        "rankingEligible": true,
        "overallRank": 18,
        "url": "https://benchlm.ai/models/claude-sonnet-4-6",
        "markdownUrl": "https://benchlm.ai/md/models/claude-sonnet-4-6.md",
        "score": 86.2,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 14,
        "scoreConfidence": 3
      },
      {
        "rank": 13,
        "slug": "gemini-2-5-pro",
        "canonicalModelKey": "gemini-2-5-pro",
        "model": "Gemini 2.5 Pro",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 63,
        "rankingEligible": true,
        "overallRank": 49,
        "url": "https://benchlm.ai/models/gemini-2-5-pro",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-2-5-pro.md",
        "score": 85.2,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 3,
        "scoreConfidence": 1
      },
      {
        "rank": 14,
        "slug": "gemini-3-1-pro",
        "canonicalModelKey": "gemini-3-1-pro",
        "model": "Gemini 3.1 Pro",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 91,
        "rankingEligible": true,
        "overallRank": 4,
        "url": "https://benchlm.ai/models/gemini-3-1-pro",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-3-1-pro.md",
        "score": 84,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 17,
        "scoreConfidence": 3
      },
      {
        "rank": 15,
        "slug": "gpt-5-2",
        "canonicalModelKey": "gpt-5-2",
        "model": "GPT-5.2",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "400K",
        "contextWindowTokens": 400000,
        "displayScore": 78,
        "rankingEligible": true,
        "overallRank": 25,
        "url": "https://benchlm.ai/models/gpt-5-2",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-2.md",
        "score": 81.5,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 11,
        "scoreConfidence": 2
      },
      {
        "rank": 16,
        "slug": "gemini-3-pro",
        "canonicalModelKey": "gemini-3-pro",
        "model": "Gemini 3 Pro",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "2M",
        "contextWindowTokens": 2000000,
        "displayScore": 80,
        "rankingEligible": true,
        "overallRank": 22,
        "url": "https://benchlm.ai/models/gemini-3-pro",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-3-pro.md",
        "score": 81.2,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 8,
        "scoreConfidence": 2
      },
      {
        "rank": 17,
        "slug": "gemini-3-5-flash",
        "canonicalModelKey": "gemini-3-5-flash",
        "model": "Gemini 3.5 Flash",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 86,
        "rankingEligible": true,
        "overallRank": 13,
        "url": "https://benchlm.ai/models/gemini-3-5-flash",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-3-5-flash.md",
        "score": 80,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 20,
        "scoreConfidence": 3
      },
      {
        "rank": 18,
        "slug": "claude-fable",
        "canonicalModelKey": "claude-fable-5",
        "model": "Claude Fable 5",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "1M+",
        "contextWindowTokens": 1000000,
        "displayScore": 97,
        "rankingEligible": true,
        "overallRank": 2,
        "url": "https://benchlm.ai/models/claude-fable",
        "markdownUrl": "https://benchlm.ai/md/models/claude-fable.md",
        "score": 79,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 17,
        "scoreConfidence": 3
      },
      {
        "rank": 19,
        "slug": "muse-spark",
        "canonicalModelKey": "muse-spark",
        "model": "Muse Spark",
        "creator": "Meta",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "262K",
        "contextWindowTokens": 262000,
        "displayScore": 80,
        "rankingEligible": false,
        "overallRank": null,
        "url": "https://benchlm.ai/models/muse-spark",
        "markdownUrl": "https://benchlm.ai/md/models/muse-spark.md",
        "score": 78.2,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 21,
        "scoreConfidence": 3
      },
      {
        "rank": 20,
        "slug": "claude-4-1-opus",
        "canonicalModelKey": "claude-4-1-opus",
        "model": "Claude 4.1 Opus",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 51,
        "rankingEligible": true,
        "overallRank": 67,
        "url": "https://benchlm.ai/models/claude-4-1-opus",
        "markdownUrl": "https://benchlm.ai/md/models/claude-4-1-opus.md",
        "score": 77.3,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 21,
        "slug": "claude-opus-4-6",
        "canonicalModelKey": "claude-opus-4-6",
        "model": "Claude Opus 4.6",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 86,
        "rankingEligible": true,
        "overallRank": 12,
        "url": "https://benchlm.ai/models/claude-opus-4-6",
        "markdownUrl": "https://benchlm.ai/md/models/claude-opus-4-6.md",
        "score": 76.7,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 27,
        "scoreConfidence": 4
      },
      {
        "rank": 22,
        "slug": "qwen3-7-plus",
        "canonicalModelKey": "qwen3-7-plus",
        "model": "Qwen3.7 Plus",
        "creator": "Alibaba",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 88,
        "rankingEligible": true,
        "overallRank": 11,
        "url": "https://benchlm.ai/models/qwen3-7-plus",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-7-plus.md",
        "score": 76,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 50,
        "scoreConfidence": 4
      },
      {
        "rank": 23,
        "slug": "claude-4-sonnet",
        "canonicalModelKey": "claude-4-sonnet",
        "model": "Claude 4 Sonnet",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 50,
        "rankingEligible": true,
        "overallRank": 70,
        "url": "https://benchlm.ai/models/claude-4-sonnet",
        "markdownUrl": "https://benchlm.ai/md/models/claude-4-sonnet.md",
        "score": 75.5,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 24,
        "slug": "claude-haiku-4-5",
        "canonicalModelKey": "claude-haiku-4-5",
        "model": "Claude Haiku 4.5",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 56,
        "rankingEligible": true,
        "overallRank": 60,
        "url": "https://benchlm.ai/models/claude-haiku-4-5",
        "markdownUrl": "https://benchlm.ai/md/models/claude-haiku-4-5.md",
        "score": 73.6,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 25,
        "slug": "grok-4",
        "canonicalModelKey": "grok-4",
        "model": "Grok 4",
        "creator": "xAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 63,
        "rankingEligible": true,
        "overallRank": 50,
        "url": "https://benchlm.ai/models/grok-4",
        "markdownUrl": "https://benchlm.ai/md/models/grok-4.md",
        "score": 73,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 26,
        "slug": "glm-5-reasoning",
        "canonicalModelKey": "glm-5-reasoning",
        "model": "GLM-5 (Reasoning)",
        "creator": "Z.AI",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 79,
        "rankingEligible": true,
        "overallRank": 24,
        "url": "https://benchlm.ai/models/glm-5-reasoning",
        "markdownUrl": "https://benchlm.ai/md/models/glm-5-reasoning.md",
        "score": 72.6,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 27,
        "slug": "gemini-3-flash",
        "canonicalModelKey": "gemini-3-flash",
        "model": "Gemini 3 Flash",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 55,
        "rankingEligible": true,
        "overallRank": 63,
        "url": "https://benchlm.ai/models/gemini-3-flash",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-3-flash.md",
        "score": 72,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 2,
        "scoreConfidence": 1
      },
      {
        "rank": 28,
        "slug": "qwen3-6-plus",
        "canonicalModelKey": "qwen3-6-plus",
        "model": "Qwen3.6 Plus",
        "creator": "Alibaba",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 66,
        "rankingEligible": true,
        "overallRank": 43,
        "url": "https://benchlm.ai/models/qwen3-6-plus",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-6-plus.md",
        "score": 71.6,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 38,
        "scoreConfidence": 4
      },
      {
        "rank": 29,
        "slug": "kimi-2-6",
        "canonicalModelKey": "kimi-2-6",
        "model": "Kimi K2.6",
        "creator": "Moonshot AI",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "256K",
        "contextWindowTokens": 256000,
        "displayScore": 81,
        "rankingEligible": true,
        "overallRank": 21,
        "url": "https://benchlm.ai/models/kimi-2-6",
        "markdownUrl": "https://benchlm.ai/md/models/kimi-2-6.md",
        "score": 71.3,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 28,
        "scoreConfidence": 2
      },
      {
        "rank": 30,
        "slug": "mimo-v2-5",
        "canonicalModelKey": "mimo-v2-5",
        "model": "MiMo-V2.5",
        "creator": "Xiaomi",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 71,
        "rankingEligible": false,
        "overallRank": null,
        "url": "https://benchlm.ai/models/mimo-v2-5",
        "markdownUrl": "https://benchlm.ai/md/models/mimo-v2-5.md",
        "score": 70,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 8,
        "scoreConfidence": 2
      },
      {
        "rank": 31,
        "slug": "mimo-v2-flash",
        "canonicalModelKey": "mimo-v2-flash",
        "model": "MiMo-V2-Flash",
        "creator": "Xiaomi",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "256K",
        "contextWindowTokens": 256000,
        "displayScore": 59,
        "rankingEligible": true,
        "overallRank": 54,
        "url": "https://benchlm.ai/models/mimo-v2-flash",
        "markdownUrl": "https://benchlm.ai/md/models/mimo-v2-flash.md",
        "score": 68.7,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 32,
        "slug": "kimi-k2-5-reasoning",
        "canonicalModelKey": "kimi-k2-5-reasoning",
        "model": "Kimi K2.5 (Reasoning)",
        "creator": "Moonshot AI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 75,
        "rankingEligible": true,
        "overallRank": 31,
        "url": "https://benchlm.ai/models/kimi-k2-5-reasoning",
        "markdownUrl": "https://benchlm.ai/md/models/kimi-k2-5-reasoning.md",
        "score": 67.8,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 8,
        "scoreConfidence": 2
      },
      {
        "rank": 33,
        "slug": "mistral-large-3",
        "canonicalModelKey": "mistral-large-3",
        "model": "Mistral Large 3",
        "creator": "Mistral",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 48,
        "rankingEligible": true,
        "overallRank": 74,
        "url": "https://benchlm.ai/models/mistral-large-3",
        "markdownUrl": "https://benchlm.ai/md/models/mistral-large-3.md",
        "score": 67.7,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 34,
        "slug": "o1-preview",
        "canonicalModelKey": "o1-preview",
        "model": "o1-preview",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 82,
        "rankingEligible": true,
        "overallRank": 20,
        "url": "https://benchlm.ai/models/o1-preview",
        "markdownUrl": "https://benchlm.ai/md/models/o1-preview.md",
        "score": 67.5,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 35,
        "slug": "claude-3-5-sonnet",
        "canonicalModelKey": "claude-3-5-sonnet",
        "model": "Claude 3.5 Sonnet",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 40,
        "rankingEligible": true,
        "overallRank": 84,
        "url": "https://benchlm.ai/models/claude-3-5-sonnet",
        "markdownUrl": "https://benchlm.ai/md/models/claude-3-5-sonnet.md",
        "score": 66.9,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 36,
        "slug": "claude-opus-4-8",
        "canonicalModelKey": "claude-opus-4-8",
        "model": "Claude Opus 4.8",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 93,
        "rankingEligible": true,
        "overallRank": 3,
        "url": "https://benchlm.ai/models/claude-opus-4-8",
        "markdownUrl": "https://benchlm.ai/md/models/claude-opus-4-8.md",
        "score": 66.4,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 23,
        "scoreConfidence": 2
      },
      {
        "rank": 37,
        "slug": "claude-4-1-opus-thinking",
        "canonicalModelKey": "claude-4-1-opus-thinking",
        "model": "Claude 4.1 Opus Thinking",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 43,
        "rankingEligible": true,
        "overallRank": 80,
        "url": "https://benchlm.ai/models/claude-4-1-opus-thinking",
        "markdownUrl": "https://benchlm.ai/md/models/claude-4-1-opus-thinking.md",
        "score": 65.7,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 38,
        "slug": "o3-mini",
        "canonicalModelKey": "o3-mini",
        "model": "o3-mini",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 55,
        "rankingEligible": true,
        "overallRank": 64,
        "url": "https://benchlm.ai/models/o3-mini",
        "markdownUrl": "https://benchlm.ai/md/models/o3-mini.md",
        "score": 65.6,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 5,
        "scoreConfidence": 1
      },
      {
        "rank": 39,
        "slug": "gemini-1-5-pro",
        "canonicalModelKey": "gemini-1-5-pro",
        "model": "Gemini 1.5 Pro",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "2M",
        "contextWindowTokens": 2000000,
        "displayScore": 35,
        "rankingEligible": true,
        "overallRank": 89,
        "url": "https://benchlm.ai/models/gemini-1-5-pro",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-1-5-pro.md",
        "score": 65.5,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 40,
        "slug": "qwen3-5-122b-a10b",
        "canonicalModelKey": "qwen3-5-122b-a10b",
        "model": "Qwen3.5-122B-A10B",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "262K",
        "contextWindowTokens": 262000,
        "displayScore": 63,
        "rankingEligible": true,
        "overallRank": 48,
        "url": "https://benchlm.ai/models/qwen3-5-122b-a10b",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-5-122b-a10b.md",
        "score": 65.2,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 13,
        "scoreConfidence": 3
      },
      {
        "rank": 41,
        "slug": "qwen3-5-397b",
        "canonicalModelKey": "qwen3-5-397b",
        "model": "Qwen3.5 397B",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 62,
        "rankingEligible": true,
        "overallRank": 51,
        "url": "https://benchlm.ai/models/qwen3-5-397b",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-5-397b.md",
        "score": 65.1,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 36,
        "scoreConfidence": 4
      },
      {
        "rank": 42,
        "slug": "qwen3-5-27b",
        "canonicalModelKey": "qwen3-5-27b",
        "model": "Qwen3.5-27B",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "262K",
        "contextWindowTokens": 262000,
        "displayScore": 61,
        "rankingEligible": true,
        "overallRank": 52,
        "url": "https://benchlm.ai/models/qwen3-5-27b",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-5-27b.md",
        "score": 65.1,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 13,
        "scoreConfidence": 3
      },
      {
        "rank": 43,
        "slug": "qwen3-6-27b",
        "canonicalModelKey": "qwen3-6-27b",
        "model": "Qwen3.6-27B",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "262K",
        "contextWindowTokens": 262000,
        "displayScore": 72,
        "rankingEligible": true,
        "overallRank": 34,
        "url": "https://benchlm.ai/models/qwen3-6-27b",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-6-27b.md",
        "score": 65,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 37,
        "scoreConfidence": 2
      },
      {
        "rank": 44,
        "slug": "o3-pro",
        "canonicalModelKey": "o3-pro",
        "model": "o3-pro",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 57,
        "rankingEligible": true,
        "overallRank": 57,
        "url": "https://benchlm.ai/models/o3-pro",
        "markdownUrl": "https://benchlm.ai/md/models/o3-pro.md",
        "score": 64.7,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 45,
        "slug": "qwen3-6-35b-a3b",
        "canonicalModelKey": "qwen3-6-35b-a3b",
        "model": "Qwen3.6-35B-A3B",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "262K",
        "contextWindowTokens": 262000,
        "displayScore": 65,
        "rankingEligible": true,
        "overallRank": 45,
        "url": "https://benchlm.ai/models/qwen3-6-35b-a3b",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-6-35b-a3b.md",
        "score": 64,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 40,
        "scoreConfidence": 2
      },
      {
        "rank": 46,
        "slug": "gpt-4-1",
        "canonicalModelKey": "gpt-4-1",
        "model": "GPT-4.1",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 57,
        "rankingEligible": true,
        "overallRank": 56,
        "url": "https://benchlm.ai/models/gpt-4-1",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-4-1.md",
        "score": 63.9,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 4,
        "scoreConfidence": 1
      },
      {
        "rank": 47,
        "slug": "qwen3-5-35b-a3b",
        "canonicalModelKey": "qwen3-5-35b-a3b",
        "model": "Qwen3.5-35B-A3B",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "262K",
        "contextWindowTokens": 262000,
        "displayScore": 55,
        "rankingEligible": true,
        "overallRank": 62,
        "url": "https://benchlm.ai/models/qwen3-5-35b-a3b",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-5-35b-a3b.md",
        "score": 63.3,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 13,
        "scoreConfidence": 3
      },
      {
        "rank": 48,
        "slug": "kimi-k2-5",
        "canonicalModelKey": "kimi-k2-5",
        "model": "Kimi K2.5",
        "creator": "Moonshot AI",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "256K",
        "contextWindowTokens": 256000,
        "displayScore": 63,
        "rankingEligible": true,
        "overallRank": 47,
        "url": "https://benchlm.ai/models/kimi-k2-5",
        "markdownUrl": "https://benchlm.ai/md/models/kimi-k2-5.md",
        "score": 63.1,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 39,
        "scoreConfidence": 4
      },
      {
        "rank": 49,
        "slug": "gpt-4o",
        "canonicalModelKey": "gpt-4o",
        "model": "GPT-4o",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 42,
        "rankingEligible": true,
        "overallRank": 81,
        "url": "https://benchlm.ai/models/gpt-4o",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-4o.md",
        "score": 62.2,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 50,
        "slug": "o3",
        "canonicalModelKey": "o3",
        "model": "o3",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 56,
        "rankingEligible": true,
        "overallRank": 61,
        "url": "https://benchlm.ai/models/o3",
        "markdownUrl": "https://benchlm.ai/md/models/o3.md",
        "score": 61.7,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 51,
        "slug": "claude-opus-4-5",
        "canonicalModelKey": "claude-opus-4-5",
        "model": "Claude Opus 4.5",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 76,
        "rankingEligible": true,
        "overallRank": 28,
        "url": "https://benchlm.ai/models/claude-opus-4-5",
        "markdownUrl": "https://benchlm.ai/md/models/claude-opus-4-5.md",
        "score": 61.2,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 41,
        "scoreConfidence": 4
      },
      {
        "rank": 52,
        "slug": "gpt-5-4",
        "canonicalModelKey": "gpt-5-4",
        "model": "GPT-5.4",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "1.05M",
        "contextWindowTokens": 1050000,
        "displayScore": 88,
        "rankingEligible": true,
        "overallRank": 10,
        "url": "https://benchlm.ai/models/gpt-5-4",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-4.md",
        "score": 59.6,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 26,
        "scoreConfidence": 4
      },
      {
        "rank": 53,
        "slug": "deepseek-v3-2-thinking",
        "canonicalModelKey": "deepseek-v3-2-thinking",
        "model": "DeepSeek V3.2 (Thinking)",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 60,
        "rankingEligible": true,
        "overallRank": 53,
        "url": "https://benchlm.ai/models/deepseek-v3-2-thinking",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-v3-2-thinking.md",
        "score": 59,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 54,
        "slug": "o1",
        "canonicalModelKey": "o1",
        "model": "o1",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 57,
        "rankingEligible": true,
        "overallRank": 58,
        "url": "https://benchlm.ai/models/o1",
        "markdownUrl": "https://benchlm.ai/md/models/o1.md",
        "score": 58.9,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 3,
        "scoreConfidence": 1
      },
      {
        "rank": 55,
        "slug": "claude-3-opus",
        "canonicalModelKey": "claude-3-opus",
        "model": "Claude 3 Opus",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 34,
        "rankingEligible": true,
        "overallRank": 92,
        "url": "https://benchlm.ai/models/claude-3-opus",
        "markdownUrl": "https://benchlm.ai/md/models/claude-3-opus.md",
        "score": 58.9,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 56,
        "slug": "qwen3-5-397b-reasoning",
        "canonicalModelKey": "qwen3-5-397b-reasoning",
        "model": "Qwen3.5 397B (Reasoning)",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 77,
        "rankingEligible": true,
        "overallRank": 26,
        "url": "https://benchlm.ai/models/qwen3-5-397b-reasoning",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-5-397b-reasoning.md",
        "score": 58.4,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 57,
        "slug": "gemini-3-1-flash-lite",
        "canonicalModelKey": "gemini-3-1-flash-lite",
        "model": "Gemini 3.1 Flash-Lite",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 47,
        "rankingEligible": true,
        "overallRank": 75,
        "url": "https://benchlm.ai/models/gemini-3-1-flash-lite",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-3-1-flash-lite.md",
        "score": 58.3,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 2,
        "scoreConfidence": 1
      },
      {
        "rank": 58,
        "slug": "glm-4-7",
        "canonicalModelKey": "glm-4-7",
        "model": "GLM-4.7",
        "creator": "Z.AI",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 68,
        "rankingEligible": true,
        "overallRank": 40,
        "url": "https://benchlm.ai/models/glm-4-7",
        "markdownUrl": "https://benchlm.ai/md/models/glm-4-7.md",
        "score": 58.2,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 7,
        "scoreConfidence": 1
      },
      {
        "rank": 59,
        "slug": "gpt-5-5",
        "canonicalModelKey": "gpt-5-5",
        "model": "GPT-5.5",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 89,
        "rankingEligible": true,
        "overallRank": 7,
        "url": "https://benchlm.ai/models/gpt-5-5",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-5.md",
        "score": 58,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 22,
        "scoreConfidence": 3
      },
      {
        "rank": 60,
        "slug": "gpt-4-1-mini",
        "canonicalModelKey": "gpt-4-1-mini",
        "model": "GPT-4.1 mini",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 45,
        "rankingEligible": true,
        "overallRank": 77,
        "url": "https://benchlm.ai/models/gpt-4-1-mini",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-4-1-mini.md",
        "score": 56.8,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 4,
        "scoreConfidence": 1
      },
      {
        "rank": 61,
        "slug": "glm-5",
        "canonicalModelKey": "glm-5",
        "model": "GLM-5",
        "creator": "Z.AI",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 67,
        "rankingEligible": true,
        "overallRank": 42,
        "url": "https://benchlm.ai/models/glm-5",
        "markdownUrl": "https://benchlm.ai/md/models/glm-5.md",
        "score": 56,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 33,
        "scoreConfidence": 4
      },
      {
        "rank": 62,
        "slug": "claude-3-haiku",
        "canonicalModelKey": "claude-3-haiku",
        "model": "Claude 3 Haiku",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 23,
        "rankingEligible": true,
        "overallRank": 111,
        "url": "https://benchlm.ai/models/claude-3-haiku",
        "markdownUrl": "https://benchlm.ai/md/models/claude-3-haiku.md",
        "score": 55.8,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 63,
        "slug": "gemini-1-0-pro",
        "canonicalModelKey": "gemini-1-0-pro",
        "model": "Gemini 1.0 Pro",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 24,
        "rankingEligible": true,
        "overallRank": 108,
        "url": "https://benchlm.ai/models/gemini-1-0-pro",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-1-0-pro.md",
        "score": 55.2,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 64,
        "slug": "qwen2-5-1m",
        "canonicalModelKey": "qwen2-5-1m",
        "model": "Qwen2.5-1M",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 50,
        "rankingEligible": true,
        "overallRank": 69,
        "url": "https://benchlm.ai/models/qwen2-5-1m",
        "markdownUrl": "https://benchlm.ai/md/models/qwen2-5-1m.md",
        "score": 54.3,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 65,
        "slug": "gemini-2-5-flash",
        "canonicalModelKey": "gemini-2-5-flash",
        "model": "Gemini 2.5 Flash",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 37,
        "rankingEligible": true,
        "overallRank": 88,
        "url": "https://benchlm.ai/models/gemini-2-5-flash",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-2-5-flash.md",
        "score": 54,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 66,
        "slug": "deepseekmath-v2",
        "canonicalModelKey": "deepseekmath-v2",
        "model": "DeepSeekMath V2",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 49,
        "rankingEligible": true,
        "overallRank": 73,
        "url": "https://benchlm.ai/models/deepseekmath-v2",
        "markdownUrl": "https://benchlm.ai/md/models/deepseekmath-v2.md",
        "score": 53.9,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 67,
        "slug": "qwen2-5-72b",
        "canonicalModelKey": "qwen2-5-72b",
        "model": "Qwen2.5-72B",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 49,
        "rankingEligible": true,
        "overallRank": 72,
        "url": "https://benchlm.ai/models/qwen2-5-72b",
        "markdownUrl": "https://benchlm.ai/md/models/qwen2-5-72b.md",
        "score": 51.7,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 68,
        "slug": "o4-mini-high",
        "canonicalModelKey": "o4-mini-high",
        "model": "o4-mini (high)",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 43,
        "rankingEligible": true,
        "overallRank": 79,
        "url": "https://benchlm.ai/models/o4-mini-high",
        "markdownUrl": "https://benchlm.ai/md/models/o4-mini-high.md",
        "score": 50.3,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 69,
        "slug": "deepseek-v3-2",
        "canonicalModelKey": "deepseek-v3-2",
        "model": "DeepSeek V3.2",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 56,
        "rankingEligible": true,
        "overallRank": 59,
        "url": "https://benchlm.ai/models/deepseek-v3-2",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-v3-2.md",
        "score": 50,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 4,
        "scoreConfidence": 1
      },
      {
        "rank": 70,
        "slug": "minimax-m3",
        "canonicalModelKey": "minimax-m3",
        "model": "MiniMax M3",
        "creator": "MiniMax",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 79,
        "rankingEligible": true,
        "overallRank": 23,
        "url": "https://benchlm.ai/models/minimax-m3",
        "markdownUrl": "https://benchlm.ai/md/models/minimax-m3.md",
        "score": 48.1,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 15,
        "scoreConfidence": 2
      },
      {
        "rank": 71,
        "slug": "grok-4-20-beta",
        "canonicalModelKey": "grok-4-20-beta",
        "model": "Grok 4.20",
        "creator": "xAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "2M",
        "contextWindowTokens": 2000000,
        "displayScore": 71,
        "rankingEligible": true,
        "overallRank": 35,
        "url": "https://benchlm.ai/models/grok-4-20-beta",
        "markdownUrl": "https://benchlm.ai/md/models/grok-4-20-beta.md",
        "score": 47.6,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 16,
        "scoreConfidence": 3
      },
      {
        "rank": 72,
        "slug": "deepseek-llm-2-0",
        "canonicalModelKey": "deepseek-llm-2-0",
        "model": "DeepSeek LLM 2.0",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 50,
        "rankingEligible": true,
        "overallRank": 68,
        "url": "https://benchlm.ai/models/deepseek-llm-2-0",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-llm-2-0.md",
        "score": 47.5,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 73,
        "slug": "claude-opus-4-7-adaptive",
        "canonicalModelKey": "claude-opus-4-7-max",
        "model": "Claude Opus 4.7 (Adaptive)",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 84,
        "rankingEligible": true,
        "overallRank": 16,
        "url": "https://benchlm.ai/models/claude-opus-4-7-adaptive",
        "markdownUrl": "https://benchlm.ai/md/models/claude-opus-4-7-adaptive.md",
        "score": 45,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 18,
        "scoreConfidence": 3
      },
      {
        "rank": 74,
        "slug": "llama-3-1-405b",
        "canonicalModelKey": "llama-3-1-405b",
        "model": "Llama 3.1 405B",
        "creator": "Meta",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 40,
        "rankingEligible": true,
        "overallRank": 83,
        "url": "https://benchlm.ai/models/llama-3-1-405b",
        "markdownUrl": "https://benchlm.ai/md/models/llama-3-1-405b.md",
        "score": 43.8,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 75,
        "slug": "gpt-4o-mini",
        "canonicalModelKey": "gpt-4o-mini",
        "model": "GPT-4o mini",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 49,
        "rankingEligible": true,
        "overallRank": 71,
        "url": "https://benchlm.ai/models/gpt-4o-mini",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-4o-mini.md",
        "score": 41.2,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 76,
        "slug": "mistral-large-2",
        "canonicalModelKey": "mistral-large-2",
        "model": "Mistral Large 2",
        "creator": "Mistral",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 38,
        "rankingEligible": true,
        "overallRank": 87,
        "url": "https://benchlm.ai/models/mistral-large-2",
        "markdownUrl": "https://benchlm.ai/md/models/mistral-large-2.md",
        "score": 41.1,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 77,
        "slug": "nemotron-3-super-100b",
        "canonicalModelKey": "nemotron-3-super-100b",
        "model": "Nemotron 3 Super 100B",
        "creator": "NVIDIA",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 43,
        "rankingEligible": true,
        "overallRank": 78,
        "url": "https://benchlm.ai/models/nemotron-3-super-100b",
        "markdownUrl": "https://benchlm.ai/md/models/nemotron-3-super-100b.md",
        "score": 40,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 78,
        "slug": "gpt-4-1-nano",
        "canonicalModelKey": "gpt-4-1-nano",
        "model": "GPT-4.1 nano",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 27,
        "rankingEligible": true,
        "overallRank": 101,
        "url": "https://benchlm.ai/models/gpt-4-1-nano",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-4-1-nano.md",
        "score": 37.9,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 3,
        "scoreConfidence": 1
      },
      {
        "rank": 79,
        "slug": "llama-4-scout",
        "canonicalModelKey": "llama-4-scout",
        "model": "Llama 4 Scout",
        "creator": "Meta",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "10M",
        "contextWindowTokens": 10000000,
        "displayScore": 25,
        "rankingEligible": true,
        "overallRank": 103,
        "url": "https://benchlm.ai/models/llama-4-scout",
        "markdownUrl": "https://benchlm.ai/md/models/llama-4-scout.md",
        "score": 36.4,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 80,
        "slug": "deepseek-coder-2-0",
        "canonicalModelKey": "deepseek-coder-2-0",
        "model": "DeepSeek Coder 2.0",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 51,
        "rankingEligible": true,
        "overallRank": 66,
        "url": "https://benchlm.ai/models/deepseek-coder-2-0",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-coder-2-0.md",
        "score": 36.2,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 81,
        "slug": "llama-4-maverick",
        "canonicalModelKey": "llama-4-maverick",
        "model": "Llama 4 Maverick",
        "creator": "Meta",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 17,
        "rankingEligible": true,
        "overallRank": 117,
        "url": "https://benchlm.ai/models/llama-4-maverick",
        "markdownUrl": "https://benchlm.ai/md/models/llama-4-maverick.md",
        "score": 34.6,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 82,
        "slug": "command-a-plus",
        "canonicalModelKey": "command-a-plus",
        "model": "Command A+",
        "creator": "Cohere",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 39,
        "rankingEligible": false,
        "overallRank": null,
        "url": "https://benchlm.ai/models/command-a-plus",
        "markdownUrl": "https://benchlm.ai/md/models/command-a-plus.md",
        "score": 32.8,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 3,
        "scoreConfidence": 1
      },
      {
        "rank": 83,
        "slug": "llama-4-behemoth",
        "canonicalModelKey": "llama-4-behemoth",
        "model": "Llama 4 Behemoth",
        "creator": "Meta",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 11,
        "rankingEligible": true,
        "overallRank": 120,
        "url": "https://benchlm.ai/models/llama-4-behemoth",
        "markdownUrl": "https://benchlm.ai/md/models/llama-4-behemoth.md",
        "score": 31.9,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 84,
        "slug": "gpt-4-turbo",
        "canonicalModelKey": "gpt-4-turbo",
        "model": "GPT-4 Turbo",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 24,
        "rankingEligible": true,
        "overallRank": 107,
        "url": "https://benchlm.ai/models/gpt-4-turbo",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-4-turbo.md",
        "score": 31.2,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 85,
        "slug": "moonshot-v1",
        "canonicalModelKey": "moonshot-v1",
        "model": "Moonshot v1",
        "creator": "Moonshot AI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 22,
        "rankingEligible": true,
        "overallRank": 114,
        "url": "https://benchlm.ai/models/moonshot-v1",
        "markdownUrl": "https://benchlm.ai/md/models/moonshot-v1.md",
        "score": 26.3,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 86,
        "slug": "llama-3-70b",
        "canonicalModelKey": "llama-3-70b",
        "model": "Llama 3 70B",
        "creator": "Meta",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 25,
        "rankingEligible": true,
        "overallRank": 105,
        "url": "https://benchlm.ai/models/llama-3-70b",
        "markdownUrl": "https://benchlm.ai/md/models/llama-3-70b.md",
        "score": 25.8,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 87,
        "slug": "z-1",
        "canonicalModelKey": "z-1",
        "model": "Z-1",
        "creator": "Z",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 23,
        "rankingEligible": true,
        "overallRank": 109,
        "url": "https://benchlm.ai/models/z-1",
        "markdownUrl": "https://benchlm.ai/md/models/z-1.md",
        "score": 22.4,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 88,
        "slug": "grok-code-fast-1",
        "canonicalModelKey": "grok-code-fast-1",
        "model": "Grok Code Fast 1",
        "creator": "xAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "256K",
        "contextWindowTokens": 256000,
        "displayScore": 39,
        "rankingEligible": true,
        "overallRank": 85,
        "url": "https://benchlm.ai/models/grok-code-fast-1",
        "markdownUrl": "https://benchlm.ai/md/models/grok-code-fast-1.md",
        "score": 21.2,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 89,
        "slug": "nemotron-4-15b",
        "canonicalModelKey": "nemotron-4-15b",
        "model": "Nemotron-4 15B",
        "creator": "NVIDIA",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 22,
        "rankingEligible": true,
        "overallRank": 113,
        "url": "https://benchlm.ai/models/nemotron-4-15b",
        "markdownUrl": "https://benchlm.ai/md/models/nemotron-4-15b.md",
        "score": 20.9,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 90,
        "slug": "o1-pro",
        "canonicalModelKey": "o1-pro",
        "model": "o1-pro",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 28,
        "rankingEligible": true,
        "overallRank": 99,
        "url": "https://benchlm.ai/models/o1-pro",
        "markdownUrl": "https://benchlm.ai/md/models/o1-pro.md",
        "score": 19.3,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 91,
        "slug": "gpt-oss-120b",
        "canonicalModelKey": "gpt-oss-120b",
        "model": "GPT-OSS 120B",
        "creator": "OpenAI",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 34,
        "rankingEligible": true,
        "overallRank": 91,
        "url": "https://benchlm.ai/models/gpt-oss-120b",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-oss-120b.md",
        "score": 18.9,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 92,
        "slug": "phi-4",
        "canonicalModelKey": "phi-4",
        "model": "Phi-4",
        "creator": "Microsoft",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "16K",
        "contextWindowTokens": 16000,
        "displayScore": 27,
        "rankingEligible": true,
        "overallRank": 100,
        "url": "https://benchlm.ai/models/phi-4",
        "markdownUrl": "https://benchlm.ai/md/models/phi-4.md",
        "score": 18.9,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 93,
        "slug": "mistral-8x7b",
        "canonicalModelKey": "mistral-8x7b",
        "model": "Mistral 8x7B",
        "creator": "Mistral",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 23,
        "rankingEligible": true,
        "overallRank": 110,
        "url": "https://benchlm.ai/models/mistral-8x7b",
        "markdownUrl": "https://benchlm.ai/md/models/mistral-8x7b.md",
        "score": 18.2,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 94,
        "slug": "deepseek-r1",
        "canonicalModelKey": "deepseek-r1",
        "model": "DeepSeek-R1",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 32,
        "rankingEligible": true,
        "overallRank": 94,
        "url": "https://benchlm.ai/models/deepseek-r1",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-r1.md",
        "score": 17,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 95,
        "slug": "nemotron-3-nano-30b",
        "canonicalModelKey": "nemotron-3-nano-30b",
        "model": "Nemotron 3 Nano 30B",
        "creator": "NVIDIA",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 25,
        "rankingEligible": true,
        "overallRank": 104,
        "url": "https://benchlm.ai/models/nemotron-3-nano-30b",
        "markdownUrl": "https://benchlm.ai/md/models/nemotron-3-nano-30b.md",
        "score": 12.5,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 96,
        "slug": "nemotron-ultra-253b",
        "canonicalModelKey": "nemotron-ultra-253b",
        "model": "Nemotron Ultra 253B",
        "creator": "NVIDIA",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 22,
        "rankingEligible": true,
        "overallRank": 115,
        "url": "https://benchlm.ai/models/nemotron-ultra-253b",
        "markdownUrl": "https://benchlm.ai/md/models/nemotron-ultra-253b.md",
        "score": 11.4,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 97,
        "slug": "grok-3-beta",
        "canonicalModelKey": "grok-3-beta",
        "model": "Grok 3 [Beta]",
        "creator": "xAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 30,
        "rankingEligible": true,
        "overallRank": 97,
        "url": "https://benchlm.ai/models/grok-3-beta",
        "markdownUrl": "https://benchlm.ai/md/models/grok-3-beta.md",
        "score": 9.4,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 98,
        "slug": "qwen3-235b-2507-reasoning",
        "canonicalModelKey": "qwen3-235b-2507-reasoning",
        "model": "Qwen3 235B 2507 (Reasoning)",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 45,
        "rankingEligible": true,
        "overallRank": 76,
        "url": "https://benchlm.ai/models/qwen3-235b-2507-reasoning",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-235b-2507-reasoning.md",
        "score": 7.3,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 99,
        "slug": "gemma-3-27b",
        "canonicalModelKey": "gemma-3-27b",
        "model": "Gemma 3 27B",
        "creator": "Google",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 16,
        "rankingEligible": true,
        "overallRank": 118,
        "url": "https://benchlm.ai/models/gemma-3-27b",
        "markdownUrl": "https://benchlm.ai/md/models/gemma-3-27b.md",
        "score": 6.9,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 100,
        "slug": "qwen3-235b-2507",
        "canonicalModelKey": "qwen3-235b-2507",
        "model": "Qwen3 235B 2507",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 32,
        "rankingEligible": true,
        "overallRank": 95,
        "url": "https://benchlm.ai/models/qwen3-235b-2507",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-235b-2507.md",
        "score": 6.5,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 4,
        "scoreConfidence": 1
      },
      {
        "rank": 101,
        "slug": "deepseek-v3-1-reasoning",
        "canonicalModelKey": "deepseek-v3-1-reasoning",
        "model": "DeepSeek V3.1 (Reasoning)",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 29,
        "rankingEligible": true,
        "overallRank": 98,
        "url": "https://benchlm.ai/models/deepseek-v3-1-reasoning",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-v3-1-reasoning.md",
        "score": 6.2,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 102,
        "slug": "nova-pro",
        "canonicalModelKey": "nova-pro",
        "model": "Nova Pro",
        "creator": "Amazon",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 10,
        "rankingEligible": true,
        "overallRank": 121,
        "url": "https://benchlm.ai/models/nova-pro",
        "markdownUrl": "https://benchlm.ai/md/models/nova-pro.md",
        "score": 5.5,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 103,
        "slug": "glm-4-5",
        "canonicalModelKey": "glm-4-5",
        "model": "GLM-4.5",
        "creator": "Z.AI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 25,
        "rankingEligible": true,
        "overallRank": 102,
        "url": "https://benchlm.ai/models/glm-4-5",
        "markdownUrl": "https://benchlm.ai/md/models/glm-4-5.md",
        "score": 5.2,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 104,
        "slug": "deepseek-v3-1",
        "canonicalModelKey": "deepseek-v3-1",
        "model": "DeepSeek V3.1",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 24,
        "rankingEligible": true,
        "overallRank": 106,
        "url": "https://benchlm.ai/models/deepseek-v3-1",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-v3-1.md",
        "score": 3.7,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 105,
        "slug": "glm-4-5-air",
        "canonicalModelKey": "glm-4-5-air",
        "model": "GLM-4.5-Air",
        "creator": "Z.AI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 18,
        "rankingEligible": true,
        "overallRank": 116,
        "url": "https://benchlm.ai/models/glm-4-5-air",
        "markdownUrl": "https://benchlm.ai/md/models/glm-4-5-air.md",
        "score": 3,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 106,
        "slug": "gpt-oss-20b",
        "canonicalModelKey": "gpt-oss-20b",
        "model": "GPT-OSS 20B",
        "creator": "OpenAI",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 16,
        "rankingEligible": true,
        "overallRank": 119,
        "url": "https://benchlm.ai/models/gpt-oss-20b",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-oss-20b.md",
        "score": 1.5,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 107,
        "slug": "dbrx-instruct",
        "canonicalModelKey": "dbrx-instruct",
        "model": "DBRX Instruct",
        "creator": "Databricks",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 32,
        "rankingEligible": true,
        "overallRank": 96,
        "url": "https://benchlm.ai/models/dbrx-instruct",
        "markdownUrl": "https://benchlm.ai/md/models/dbrx-instruct.md",
        "score": 0,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 108,
        "slug": "mixtral-8x22b-instruct-v0-1",
        "canonicalModelKey": "mixtral-8x22b-instruct-v0-1",
        "model": "Mixtral 8x22B Instruct v0.1",
        "creator": "Mistral",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "64K",
        "contextWindowTokens": 64000,
        "displayScore": 22,
        "rankingEligible": true,
        "overallRank": 112,
        "url": "https://benchlm.ai/models/mixtral-8x22b-instruct-v0-1",
        "markdownUrl": "https://benchlm.ai/md/models/mixtral-8x22b-instruct-v0-1.md",
        "score": 0,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 109,
        "slug": "mistral-7b-v0-3",
        "canonicalModelKey": "mistral-7b-v0-3",
        "model": "Mistral 7B v0.3",
        "creator": "Mistral",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 4,
        "rankingEligible": true,
        "overallRank": 122,
        "url": "https://benchlm.ai/models/mistral-7b-v0-3",
        "markdownUrl": "https://benchlm.ai/md/models/mistral-7b-v0-3.md",
        "score": 0,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 110,
        "slug": "mistral-8x7b-v0-2",
        "canonicalModelKey": "mistral-8x7b-v0-2",
        "model": "Mistral 8x7B v0.2",
        "creator": "Mistral",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 1,
        "rankingEligible": true,
        "overallRank": 123,
        "url": "https://benchlm.ai/models/mistral-8x7b-v0-2",
        "markdownUrl": "https://benchlm.ai/md/models/mistral-8x7b-v0-2.md",
        "score": 0,
        "category": "multimodalGrounded",
        "categoryLabel": "Multimodal & Grounded",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      }
    ],
    "knowledge": [
      {
        "rank": 1,
        "slug": "claude-opus-4-7-adaptive",
        "canonicalModelKey": "claude-opus-4-7-max",
        "model": "Claude Opus 4.7 (Adaptive)",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 84,
        "rankingEligible": true,
        "overallRank": 16,
        "url": "https://benchlm.ai/models/claude-opus-4-7-adaptive",
        "markdownUrl": "https://benchlm.ai/md/models/claude-opus-4-7-adaptive.md",
        "score": 99.6,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 18,
        "scoreConfidence": 3
      },
      {
        "rank": 2,
        "slug": "claude-opus-4-8",
        "canonicalModelKey": "claude-opus-4-8",
        "model": "Claude Opus 4.8",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 93,
        "rankingEligible": true,
        "overallRank": 3,
        "url": "https://benchlm.ai/models/claude-opus-4-8",
        "markdownUrl": "https://benchlm.ai/md/models/claude-opus-4-8.md",
        "score": 98.7,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 23,
        "scoreConfidence": 2
      },
      {
        "rank": 3,
        "slug": "gpt-5-4",
        "canonicalModelKey": "gpt-5-4",
        "model": "GPT-5.4",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "1.05M",
        "contextWindowTokens": 1050000,
        "displayScore": 88,
        "rankingEligible": true,
        "overallRank": 10,
        "url": "https://benchlm.ai/models/gpt-5-4",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-4.md",
        "score": 98.5,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 26,
        "scoreConfidence": 4
      },
      {
        "rank": 4,
        "slug": "gemini-3-1-pro",
        "canonicalModelKey": "gemini-3-1-pro",
        "model": "Gemini 3.1 Pro",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 91,
        "rankingEligible": true,
        "overallRank": 4,
        "url": "https://benchlm.ai/models/gemini-3-1-pro",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-3-1-pro.md",
        "score": 93.9,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 17,
        "scoreConfidence": 3
      },
      {
        "rank": 5,
        "slug": "grok-4-1",
        "canonicalModelKey": "grok-4-1",
        "model": "Grok 4.1",
        "creator": "xAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 89,
        "rankingEligible": true,
        "overallRank": 9,
        "url": "https://benchlm.ai/models/grok-4-1",
        "markdownUrl": "https://benchlm.ai/md/models/grok-4-1.md",
        "score": 93.7,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 6,
        "slug": "gpt-5-3-codex",
        "canonicalModelKey": "gpt-5-3-codex",
        "model": "GPT-5.3 Codex",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "400K",
        "contextWindowTokens": 400000,
        "displayScore": 85,
        "rankingEligible": true,
        "overallRank": 15,
        "url": "https://benchlm.ai/models/gpt-5-3-codex",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-3-codex.md",
        "score": 92,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 6,
        "scoreConfidence": 1
      },
      {
        "rank": 7,
        "slug": "gpt-5-2",
        "canonicalModelKey": "gpt-5-2",
        "model": "GPT-5.2",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "400K",
        "contextWindowTokens": 400000,
        "displayScore": 78,
        "rankingEligible": true,
        "overallRank": 25,
        "url": "https://benchlm.ai/models/gpt-5-2",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-2.md",
        "score": 91.1,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 11,
        "scoreConfidence": 2
      },
      {
        "rank": 8,
        "slug": "claude-opus-4-6",
        "canonicalModelKey": "claude-opus-4-6",
        "model": "Claude Opus 4.6",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 86,
        "rankingEligible": true,
        "overallRank": 12,
        "url": "https://benchlm.ai/models/claude-opus-4-6",
        "markdownUrl": "https://benchlm.ai/md/models/claude-opus-4-6.md",
        "score": 90.1,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 27,
        "scoreConfidence": 4
      },
      {
        "rank": 9,
        "slug": "gemini-3-pro-deep-think",
        "canonicalModelKey": "gemini-3-pro-deep-think",
        "model": "Gemini 3 Pro Deep Think",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "2M",
        "contextWindowTokens": 2000000,
        "displayScore": 89,
        "rankingEligible": true,
        "overallRank": 8,
        "url": "https://benchlm.ai/models/gemini-3-pro-deep-think",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-3-pro-deep-think.md",
        "score": 86.7,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 10,
        "slug": "qwen3-7-max",
        "canonicalModelKey": "qwen3-7-max",
        "model": "Qwen3.7 Max",
        "creator": "Alibaba",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 91,
        "rankingEligible": true,
        "overallRank": 5,
        "url": "https://benchlm.ai/models/qwen3-7-max",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-7-max.md",
        "score": 85.9,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 33,
        "scoreConfidence": 3
      },
      {
        "rank": 11,
        "slug": "glm-5-1",
        "canonicalModelKey": "glm-5-1",
        "model": "GLM-5.1",
        "creator": "Z.AI",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "203K",
        "contextWindowTokens": 203000,
        "displayScore": 82,
        "rankingEligible": true,
        "overallRank": 17,
        "url": "https://benchlm.ai/models/glm-5-1",
        "markdownUrl": "https://benchlm.ai/md/models/glm-5-1.md",
        "score": 83.9,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 16,
        "scoreConfidence": 3
      },
      {
        "rank": 12,
        "slug": "claude-opus-4-5",
        "canonicalModelKey": "claude-opus-4-5",
        "model": "Claude Opus 4.5",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 76,
        "rankingEligible": true,
        "overallRank": 28,
        "url": "https://benchlm.ai/models/claude-opus-4-5",
        "markdownUrl": "https://benchlm.ai/md/models/claude-opus-4-5.md",
        "score": 83,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 41,
        "scoreConfidence": 4
      },
      {
        "rank": 13,
        "slug": "gemini-3-pro",
        "canonicalModelKey": "gemini-3-pro",
        "model": "Gemini 3 Pro",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "2M",
        "contextWindowTokens": 2000000,
        "displayScore": 80,
        "rankingEligible": true,
        "overallRank": 22,
        "url": "https://benchlm.ai/models/gemini-3-pro",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-3-pro.md",
        "score": 82.3,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 8,
        "scoreConfidence": 2
      },
      {
        "rank": 14,
        "slug": "glm-5",
        "canonicalModelKey": "glm-5",
        "model": "GLM-5",
        "creator": "Z.AI",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 67,
        "rankingEligible": true,
        "overallRank": 42,
        "url": "https://benchlm.ai/models/glm-5",
        "markdownUrl": "https://benchlm.ai/md/models/glm-5.md",
        "score": 82.2,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 33,
        "scoreConfidence": 4
      },
      {
        "rank": 15,
        "slug": "claude-sonnet-4-6",
        "canonicalModelKey": "claude-sonnet-4-6",
        "model": "Claude Sonnet 4.6",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 82,
        "rankingEligible": true,
        "overallRank": 18,
        "url": "https://benchlm.ai/models/claude-sonnet-4-6",
        "markdownUrl": "https://benchlm.ai/md/models/claude-sonnet-4-6.md",
        "score": 81.9,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 14,
        "scoreConfidence": 3
      },
      {
        "rank": 16,
        "slug": "gpt-5-1",
        "canonicalModelKey": "gpt-5-1",
        "model": "GPT-5.1",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 77,
        "rankingEligible": true,
        "overallRank": 27,
        "url": "https://benchlm.ai/models/gpt-5-1",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-1.md",
        "score": 81.5,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 17,
        "slug": "glm-5-reasoning",
        "canonicalModelKey": "glm-5-reasoning",
        "model": "GLM-5 (Reasoning)",
        "creator": "Z.AI",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 79,
        "rankingEligible": true,
        "overallRank": 24,
        "url": "https://benchlm.ai/models/glm-5-reasoning",
        "markdownUrl": "https://benchlm.ai/md/models/glm-5-reasoning.md",
        "score": 81.4,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 18,
        "slug": "qwen3-5-122b-a10b",
        "canonicalModelKey": "qwen3-5-122b-a10b",
        "model": "Qwen3.5-122B-A10B",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "262K",
        "contextWindowTokens": 262000,
        "displayScore": 63,
        "rankingEligible": true,
        "overallRank": 48,
        "url": "https://benchlm.ai/models/qwen3-5-122b-a10b",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-5-122b-a10b.md",
        "score": 80.2,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 13,
        "scoreConfidence": 3
      },
      {
        "rank": 19,
        "slug": "o1-preview",
        "canonicalModelKey": "o1-preview",
        "model": "o1-preview",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 82,
        "rankingEligible": true,
        "overallRank": 20,
        "url": "https://benchlm.ai/models/o1-preview",
        "markdownUrl": "https://benchlm.ai/md/models/o1-preview.md",
        "score": 79.8,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 20,
        "slug": "qwen3-7-plus",
        "canonicalModelKey": "qwen3-7-plus",
        "model": "Qwen3.7 Plus",
        "creator": "Alibaba",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 88,
        "rankingEligible": true,
        "overallRank": 11,
        "url": "https://benchlm.ai/models/qwen3-7-plus",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-7-plus.md",
        "score": 79.2,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 50,
        "scoreConfidence": 4
      },
      {
        "rank": 21,
        "slug": "gpt-5-high",
        "canonicalModelKey": "gpt-5-high",
        "model": "GPT-5 (high)",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 76,
        "rankingEligible": true,
        "overallRank": 29,
        "url": "https://benchlm.ai/models/gpt-5-high",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-high.md",
        "score": 78.9,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 22,
        "slug": "gpt-5-1-codex-max",
        "canonicalModelKey": "gpt-5-1-codex-max",
        "model": "GPT-5.1-Codex-Max",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "400K",
        "contextWindowTokens": 400000,
        "displayScore": 75,
        "rankingEligible": true,
        "overallRank": 32,
        "url": "https://benchlm.ai/models/gpt-5-1-codex-max",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-1-codex-max.md",
        "score": 78.2,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 23,
        "slug": "qwen3-5-397b-reasoning",
        "canonicalModelKey": "qwen3-5-397b-reasoning",
        "model": "Qwen3.5 397B (Reasoning)",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 77,
        "rankingEligible": true,
        "overallRank": 26,
        "url": "https://benchlm.ai/models/qwen3-5-397b-reasoning",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-5-397b-reasoning.md",
        "score": 78,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 24,
        "slug": "qwen3-5-27b",
        "canonicalModelKey": "qwen3-5-27b",
        "model": "Qwen3.5-27B",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "262K",
        "contextWindowTokens": 262000,
        "displayScore": 61,
        "rankingEligible": true,
        "overallRank": 52,
        "url": "https://benchlm.ai/models/qwen3-5-27b",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-5-27b.md",
        "score": 78,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 13,
        "scoreConfidence": 3
      },
      {
        "rank": 25,
        "slug": "gpt-5-2-codex",
        "canonicalModelKey": "gpt-5-2-codex",
        "model": "GPT-5.2-Codex",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "400K",
        "contextWindowTokens": 400000,
        "displayScore": 76,
        "rankingEligible": true,
        "overallRank": 30,
        "url": "https://benchlm.ai/models/gpt-5-2-codex",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-2-codex.md",
        "score": 77.4,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 26,
        "slug": "deepseek-v4-pro-max",
        "canonicalModelKey": "deepseek-v4-pro-max",
        "model": "DeepSeek V4 Pro (Max)",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 86,
        "rankingEligible": true,
        "overallRank": 14,
        "url": "https://benchlm.ai/models/deepseek-v4-pro-max",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-v4-pro-max.md",
        "score": 76.6,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 24,
        "scoreConfidence": 2
      },
      {
        "rank": 27,
        "slug": "qwen3-6-plus",
        "canonicalModelKey": "qwen3-6-plus",
        "model": "Qwen3.6 Plus",
        "creator": "Alibaba",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 66,
        "rankingEligible": true,
        "overallRank": 43,
        "url": "https://benchlm.ai/models/qwen3-6-plus",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-6-plus.md",
        "score": 75.7,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 38,
        "scoreConfidence": 4
      },
      {
        "rank": 28,
        "slug": "qwen3-5-35b-a3b",
        "canonicalModelKey": "qwen3-5-35b-a3b",
        "model": "Qwen3.5-35B-A3B",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "262K",
        "contextWindowTokens": 262000,
        "displayScore": 55,
        "rankingEligible": true,
        "overallRank": 62,
        "url": "https://benchlm.ai/models/qwen3-5-35b-a3b",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-5-35b-a3b.md",
        "score": 75.3,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 13,
        "scoreConfidence": 3
      },
      {
        "rank": 29,
        "slug": "grok-4-1-fast",
        "canonicalModelKey": "grok-4-1-fast",
        "model": "Grok 4.1 Fast",
        "creator": "xAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 68,
        "rankingEligible": true,
        "overallRank": 41,
        "url": "https://benchlm.ai/models/grok-4-1-fast",
        "markdownUrl": "https://benchlm.ai/md/models/grok-4-1-fast.md",
        "score": 75.1,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 30,
        "slug": "kimi-k2-5-reasoning",
        "canonicalModelKey": "kimi-k2-5-reasoning",
        "model": "Kimi K2.5 (Reasoning)",
        "creator": "Moonshot AI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 75,
        "rankingEligible": true,
        "overallRank": 31,
        "url": "https://benchlm.ai/models/kimi-k2-5-reasoning",
        "markdownUrl": "https://benchlm.ai/md/models/kimi-k2-5-reasoning.md",
        "score": 74,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 8,
        "scoreConfidence": 2
      },
      {
        "rank": 31,
        "slug": "nemotron-3-ultra",
        "canonicalModelKey": "nemotron-3-ultra-500b",
        "model": "Nemotron 3 Ultra",
        "creator": "NVIDIA",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 68,
        "rankingEligible": true,
        "overallRank": 38,
        "url": "https://benchlm.ai/models/nemotron-3-ultra",
        "markdownUrl": "https://benchlm.ai/md/models/nemotron-3-ultra.md",
        "score": 73.8,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 18,
        "scoreConfidence": 3
      },
      {
        "rank": 32,
        "slug": "claude-sonnet-4-5",
        "canonicalModelKey": "claude-sonnet-4-5",
        "model": "Claude Sonnet 4.5",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 64,
        "rankingEligible": true,
        "overallRank": 46,
        "url": "https://benchlm.ai/models/claude-sonnet-4-5",
        "markdownUrl": "https://benchlm.ai/md/models/claude-sonnet-4-5.md",
        "score": 73.7,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 6,
        "scoreConfidence": 1
      },
      {
        "rank": 33,
        "slug": "gpt-5-medium",
        "canonicalModelKey": "gpt-5-medium",
        "model": "GPT-5 (medium)",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 70,
        "rankingEligible": true,
        "overallRank": 37,
        "url": "https://benchlm.ai/models/gpt-5-medium",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-medium.md",
        "score": 73.3,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 34,
        "slug": "gemma-4-31b",
        "canonicalModelKey": "gemma-4-31b",
        "model": "Gemma 4 31B",
        "creator": "Google",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "256K",
        "contextWindowTokens": 256000,
        "displayScore": 64,
        "rankingEligible": false,
        "overallRank": null,
        "url": "https://benchlm.ai/models/gemma-4-31b",
        "markdownUrl": "https://benchlm.ai/md/models/gemma-4-31b.md",
        "score": 72.2,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 6,
        "scoreConfidence": 1
      },
      {
        "rank": 35,
        "slug": "qwen3-5-397b",
        "canonicalModelKey": "qwen3-5-397b",
        "model": "Qwen3.5 397B",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 62,
        "rankingEligible": true,
        "overallRank": 51,
        "url": "https://benchlm.ai/models/qwen3-5-397b",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-5-397b.md",
        "score": 71.4,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 36,
        "scoreConfidence": 4
      },
      {
        "rank": 36,
        "slug": "deepseek-v4-pro-high",
        "canonicalModelKey": "deepseek-v4-pro-high",
        "model": "DeepSeek V4 Pro (High)",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 82,
        "rankingEligible": true,
        "overallRank": 19,
        "url": "https://benchlm.ai/models/deepseek-v4-pro-high",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-v4-pro-high.md",
        "score": 70.9,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 23,
        "scoreConfidence": 2
      },
      {
        "rank": 37,
        "slug": "kimi-k2-5",
        "canonicalModelKey": "kimi-k2-5",
        "model": "Kimi K2.5",
        "creator": "Moonshot AI",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "256K",
        "contextWindowTokens": 256000,
        "displayScore": 63,
        "rankingEligible": true,
        "overallRank": 47,
        "url": "https://benchlm.ai/models/kimi-k2-5",
        "markdownUrl": "https://benchlm.ai/md/models/kimi-k2-5.md",
        "score": 70.6,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 39,
        "scoreConfidence": 4
      },
      {
        "rank": 38,
        "slug": "qwen3-6-27b",
        "canonicalModelKey": "qwen3-6-27b",
        "model": "Qwen3.6-27B",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "262K",
        "contextWindowTokens": 262000,
        "displayScore": 72,
        "rankingEligible": true,
        "overallRank": 34,
        "url": "https://benchlm.ai/models/qwen3-6-27b",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-6-27b.md",
        "score": 68.7,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 37,
        "scoreConfidence": 2
      },
      {
        "rank": 39,
        "slug": "deepseek-v4-flash-max",
        "canonicalModelKey": "deepseek-v4-flash-max",
        "model": "DeepSeek V4 Flash (Max)",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 74,
        "rankingEligible": true,
        "overallRank": 33,
        "url": "https://benchlm.ai/models/deepseek-v4-flash-max",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-v4-flash-max.md",
        "score": 66.8,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 23,
        "scoreConfidence": 2
      },
      {
        "rank": 40,
        "slug": "deepseek-v3-2-thinking",
        "canonicalModelKey": "deepseek-v3-2-thinking",
        "model": "DeepSeek V3.2 (Thinking)",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 60,
        "rankingEligible": true,
        "overallRank": 53,
        "url": "https://benchlm.ai/models/deepseek-v3-2-thinking",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-v3-2-thinking.md",
        "score": 66.7,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 41,
        "slug": "o3-pro",
        "canonicalModelKey": "o3-pro",
        "model": "o3-pro",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 57,
        "rankingEligible": true,
        "overallRank": 57,
        "url": "https://benchlm.ai/models/o3-pro",
        "markdownUrl": "https://benchlm.ai/md/models/o3-pro.md",
        "score": 66.6,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 42,
        "slug": "glm-4-7",
        "canonicalModelKey": "glm-4-7",
        "model": "GLM-4.7",
        "creator": "Z.AI",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 68,
        "rankingEligible": true,
        "overallRank": 40,
        "url": "https://benchlm.ai/models/glm-4-7",
        "markdownUrl": "https://benchlm.ai/md/models/glm-4-7.md",
        "score": 66.3,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 7,
        "scoreConfidence": 1
      },
      {
        "rank": 43,
        "slug": "qwen3-6-35b-a3b",
        "canonicalModelKey": "qwen3-6-35b-a3b",
        "model": "Qwen3.6-35B-A3B",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "262K",
        "contextWindowTokens": 262000,
        "displayScore": 65,
        "rankingEligible": true,
        "overallRank": 45,
        "url": "https://benchlm.ai/models/qwen3-6-35b-a3b",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-6-35b-a3b.md",
        "score": 65.5,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 40,
        "scoreConfidence": 2
      },
      {
        "rank": 44,
        "slug": "o3",
        "canonicalModelKey": "o3",
        "model": "o3",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 56,
        "rankingEligible": true,
        "overallRank": 61,
        "url": "https://benchlm.ai/models/o3",
        "markdownUrl": "https://benchlm.ai/md/models/o3.md",
        "score": 65.4,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 45,
        "slug": "grok-4",
        "canonicalModelKey": "grok-4",
        "model": "Grok 4",
        "creator": "xAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 63,
        "rankingEligible": true,
        "overallRank": 50,
        "url": "https://benchlm.ai/models/grok-4",
        "markdownUrl": "https://benchlm.ai/md/models/grok-4.md",
        "score": 65.2,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 46,
        "slug": "gemini-2-5-pro",
        "canonicalModelKey": "gemini-2-5-pro",
        "model": "Gemini 2.5 Pro",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 63,
        "rankingEligible": true,
        "overallRank": 49,
        "url": "https://benchlm.ai/models/gemini-2-5-pro",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-2-5-pro.md",
        "score": 64.9,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 3,
        "scoreConfidence": 1
      },
      {
        "rank": 47,
        "slug": "mai-thinking-1",
        "canonicalModelKey": "mai-thinking-1",
        "model": "MAI-Thinking-1",
        "creator": "Microsoft",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "256K",
        "contextWindowTokens": 256000,
        "displayScore": 65,
        "rankingEligible": true,
        "overallRank": 44,
        "url": "https://benchlm.ai/models/mai-thinking-1",
        "markdownUrl": "https://benchlm.ai/md/models/mai-thinking-1.md",
        "score": 64.6,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 14,
        "scoreConfidence": 3
      },
      {
        "rank": 48,
        "slug": "mimo-v2-flash",
        "canonicalModelKey": "mimo-v2-flash",
        "model": "MiMo-V2-Flash",
        "creator": "Xiaomi",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "256K",
        "contextWindowTokens": 256000,
        "displayScore": 59,
        "rankingEligible": true,
        "overallRank": 54,
        "url": "https://benchlm.ai/models/mimo-v2-flash",
        "markdownUrl": "https://benchlm.ai/md/models/mimo-v2-flash.md",
        "score": 63.3,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 49,
        "slug": "gemma-4-26b-a4b",
        "canonicalModelKey": "gemma-4-26b-a4b",
        "model": "Gemma 4 26B A4B",
        "creator": "Google",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "256K",
        "contextWindowTokens": 256000,
        "displayScore": 54,
        "rankingEligible": false,
        "overallRank": null,
        "url": "https://benchlm.ai/models/gemma-4-26b-a4b",
        "markdownUrl": "https://benchlm.ai/md/models/gemma-4-26b-a4b.md",
        "score": 62.3,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 4,
        "scoreConfidence": 1
      },
      {
        "rank": 50,
        "slug": "deepseek-v4-flash-high",
        "canonicalModelKey": "deepseek-v4-flash-high",
        "model": "DeepSeek V4 Flash (High)",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 71,
        "rankingEligible": true,
        "overallRank": 36,
        "url": "https://benchlm.ai/models/deepseek-v4-flash-high",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-v4-flash-high.md",
        "score": 62.1,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 23,
        "scoreConfidence": 2
      },
      {
        "rank": 51,
        "slug": "deepseek-v3-2",
        "canonicalModelKey": "deepseek-v3-2",
        "model": "DeepSeek V3.2",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 56,
        "rankingEligible": true,
        "overallRank": 59,
        "url": "https://benchlm.ai/models/deepseek-v3-2",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-v3-2.md",
        "score": 59.9,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 4,
        "scoreConfidence": 1
      },
      {
        "rank": 52,
        "slug": "o4-mini-high",
        "canonicalModelKey": "o4-mini-high",
        "model": "o4-mini (high)",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 43,
        "rankingEligible": true,
        "overallRank": 79,
        "url": "https://benchlm.ai/models/o4-mini-high",
        "markdownUrl": "https://benchlm.ai/md/models/o4-mini-high.md",
        "score": 58,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 53,
        "slug": "deepseekmath-v2",
        "canonicalModelKey": "deepseekmath-v2",
        "model": "DeepSeekMath V2",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 49,
        "rankingEligible": true,
        "overallRank": 73,
        "url": "https://benchlm.ai/models/deepseekmath-v2",
        "markdownUrl": "https://benchlm.ai/md/models/deepseekmath-v2.md",
        "score": 57.3,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 54,
        "slug": "qwen2-5-1m",
        "canonicalModelKey": "qwen2-5-1m",
        "model": "Qwen2.5-1M",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 50,
        "rankingEligible": true,
        "overallRank": 69,
        "url": "https://benchlm.ai/models/qwen2-5-1m",
        "markdownUrl": "https://benchlm.ai/md/models/qwen2-5-1m.md",
        "score": 56.6,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 55,
        "slug": "qwen2-5-72b",
        "canonicalModelKey": "qwen2-5-72b",
        "model": "Qwen2.5-72B",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 49,
        "rankingEligible": true,
        "overallRank": 72,
        "url": "https://benchlm.ai/models/qwen2-5-72b",
        "markdownUrl": "https://benchlm.ai/md/models/qwen2-5-72b.md",
        "score": 56.4,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 56,
        "slug": "kimi-k2",
        "canonicalModelKey": "kimi-k2",
        "model": "Kimi K2",
        "creator": "Moonshot AI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 41,
        "rankingEligible": true,
        "overallRank": 82,
        "url": "https://benchlm.ai/models/kimi-k2",
        "markdownUrl": "https://benchlm.ai/md/models/kimi-k2.md",
        "score": 56,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 57,
        "slug": "deepseek-coder-2-0",
        "canonicalModelKey": "deepseek-coder-2-0",
        "model": "DeepSeek Coder 2.0",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 51,
        "rankingEligible": true,
        "overallRank": 66,
        "url": "https://benchlm.ai/models/deepseek-coder-2-0",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-coder-2-0.md",
        "score": 55.6,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 58,
        "slug": "deepseek-llm-2-0",
        "canonicalModelKey": "deepseek-llm-2-0",
        "model": "DeepSeek LLM 2.0",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 50,
        "rankingEligible": true,
        "overallRank": 68,
        "url": "https://benchlm.ai/models/deepseek-llm-2-0",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-llm-2-0.md",
        "score": 53.4,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 59,
        "slug": "claude-4-1-opus",
        "canonicalModelKey": "claude-4-1-opus",
        "model": "Claude 4.1 Opus",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 51,
        "rankingEligible": true,
        "overallRank": 67,
        "url": "https://benchlm.ai/models/claude-4-1-opus",
        "markdownUrl": "https://benchlm.ai/md/models/claude-4-1-opus.md",
        "score": 51.8,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 60,
        "slug": "deepseek-v4-pro",
        "canonicalModelKey": "deepseek-v4-pro",
        "model": "DeepSeek V4 Pro",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 68,
        "rankingEligible": true,
        "overallRank": 39,
        "url": "https://benchlm.ai/models/deepseek-v4-pro",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-v4-pro.md",
        "score": 50.3,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 21,
        "scoreConfidence": 2
      },
      {
        "rank": 61,
        "slug": "claude-4-sonnet",
        "canonicalModelKey": "claude-4-sonnet",
        "model": "Claude 4 Sonnet",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 50,
        "rankingEligible": true,
        "overallRank": 70,
        "url": "https://benchlm.ai/models/claude-4-sonnet",
        "markdownUrl": "https://benchlm.ai/md/models/claude-4-sonnet.md",
        "score": 50.1,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 62,
        "slug": "deepseek-v4-pro-base",
        "canonicalModelKey": "deepseek-v4-pro-base",
        "model": "DeepSeek V4 Pro Base",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 41,
        "rankingEligible": false,
        "overallRank": null,
        "url": "https://benchlm.ai/models/deepseek-v4-pro-base",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-v4-pro-base.md",
        "score": 49.2,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 24,
        "scoreConfidence": 2
      },
      {
        "rank": 63,
        "slug": "grok-3-mini",
        "canonicalModelKey": "grok-3-mini",
        "model": "Grok 3 Mini",
        "creator": "xAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 41,
        "rankingEligible": false,
        "overallRank": null,
        "url": "https://benchlm.ai/models/grok-3-mini",
        "markdownUrl": "https://benchlm.ai/md/models/grok-3-mini.md",
        "score": 48.9,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 64,
        "slug": "claude-haiku-4-5",
        "canonicalModelKey": "claude-haiku-4-5",
        "model": "Claude Haiku 4.5",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 56,
        "rankingEligible": true,
        "overallRank": 60,
        "url": "https://benchlm.ai/models/claude-haiku-4-5",
        "markdownUrl": "https://benchlm.ai/md/models/claude-haiku-4-5.md",
        "score": 48.3,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 65,
        "slug": "mistral-large-2",
        "canonicalModelKey": "mistral-large-2",
        "model": "Mistral Large 2",
        "creator": "Mistral",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 38,
        "rankingEligible": true,
        "overallRank": 87,
        "url": "https://benchlm.ai/models/mistral-large-2",
        "markdownUrl": "https://benchlm.ai/md/models/mistral-large-2.md",
        "score": 47.7,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 66,
        "slug": "gemini-3-flash",
        "canonicalModelKey": "gemini-3-flash",
        "model": "Gemini 3 Flash",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 55,
        "rankingEligible": true,
        "overallRank": 63,
        "url": "https://benchlm.ai/models/gemini-3-flash",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-3-flash.md",
        "score": 46.3,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 2,
        "scoreConfidence": 1
      },
      {
        "rank": 67,
        "slug": "llama-3-1-405b",
        "canonicalModelKey": "llama-3-1-405b",
        "model": "Llama 3.1 405B",
        "creator": "Meta",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 40,
        "rankingEligible": true,
        "overallRank": 83,
        "url": "https://benchlm.ai/models/llama-3-1-405b",
        "markdownUrl": "https://benchlm.ai/md/models/llama-3-1-405b.md",
        "score": 45.2,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 68,
        "slug": "deepseek-v4-flash",
        "canonicalModelKey": "deepseek-v4-flash",
        "model": "DeepSeek V4 Flash",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 57,
        "rankingEligible": true,
        "overallRank": 55,
        "url": "https://benchlm.ai/models/deepseek-v4-flash",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-v4-flash.md",
        "score": 44.6,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 21,
        "scoreConfidence": 2
      },
      {
        "rank": 69,
        "slug": "gpt-oss-120b",
        "canonicalModelKey": "gpt-oss-120b",
        "model": "GPT-OSS 120B",
        "creator": "OpenAI",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 34,
        "rankingEligible": true,
        "overallRank": 91,
        "url": "https://benchlm.ai/models/gpt-oss-120b",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-oss-120b.md",
        "score": 44.6,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 70,
        "slug": "nemotron-3-super-100b",
        "canonicalModelKey": "nemotron-3-super-100b",
        "model": "Nemotron 3 Super 100B",
        "creator": "NVIDIA",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 43,
        "rankingEligible": true,
        "overallRank": 78,
        "url": "https://benchlm.ai/models/nemotron-3-super-100b",
        "markdownUrl": "https://benchlm.ai/md/models/nemotron-3-super-100b.md",
        "score": 44,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 71,
        "slug": "qwen3-235b-2507-reasoning",
        "canonicalModelKey": "qwen3-235b-2507-reasoning",
        "model": "Qwen3 235B 2507 (Reasoning)",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 45,
        "rankingEligible": true,
        "overallRank": 76,
        "url": "https://benchlm.ai/models/qwen3-235b-2507-reasoning",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-235b-2507-reasoning.md",
        "score": 43.5,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 72,
        "slug": "deepseek-v3",
        "canonicalModelKey": "deepseek-v3",
        "model": "DeepSeek V3",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 34,
        "rankingEligible": true,
        "overallRank": 90,
        "url": "https://benchlm.ai/models/deepseek-v3",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-v3.md",
        "score": 42.7,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 73,
        "slug": "mistral-large-3",
        "canonicalModelKey": "mistral-large-3",
        "model": "Mistral Large 3",
        "creator": "Mistral",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 48,
        "rankingEligible": true,
        "overallRank": 74,
        "url": "https://benchlm.ai/models/mistral-large-3",
        "markdownUrl": "https://benchlm.ai/md/models/mistral-large-3.md",
        "score": 41.1,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 74,
        "slug": "qwen3-235b-2507",
        "canonicalModelKey": "qwen3-235b-2507",
        "model": "Qwen3 235B 2507",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 32,
        "rankingEligible": true,
        "overallRank": 95,
        "url": "https://benchlm.ai/models/qwen3-235b-2507",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-235b-2507.md",
        "score": 40,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 4,
        "scoreConfidence": 1
      },
      {
        "rank": 75,
        "slug": "claude-3-5-sonnet",
        "canonicalModelKey": "claude-3-5-sonnet",
        "model": "Claude 3.5 Sonnet",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 40,
        "rankingEligible": true,
        "overallRank": 84,
        "url": "https://benchlm.ai/models/claude-3-5-sonnet",
        "markdownUrl": "https://benchlm.ai/md/models/claude-3-5-sonnet.md",
        "score": 39.1,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 76,
        "slug": "deepseek-r1",
        "canonicalModelKey": "deepseek-r1",
        "model": "DeepSeek-R1",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 32,
        "rankingEligible": true,
        "overallRank": 94,
        "url": "https://benchlm.ai/models/deepseek-r1",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-r1.md",
        "score": 38.9,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 77,
        "slug": "gpt-4o",
        "canonicalModelKey": "gpt-4o",
        "model": "GPT-4o",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 42,
        "rankingEligible": true,
        "overallRank": 81,
        "url": "https://benchlm.ai/models/gpt-4o",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-4o.md",
        "score": 35.6,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 78,
        "slug": "gemini-3-1-flash-lite",
        "canonicalModelKey": "gemini-3-1-flash-lite",
        "model": "Gemini 3.1 Flash-Lite",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 47,
        "rankingEligible": true,
        "overallRank": 75,
        "url": "https://benchlm.ai/models/gemini-3-1-flash-lite",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-3-1-flash-lite.md",
        "score": 35.5,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 2,
        "scoreConfidence": 1
      },
      {
        "rank": 79,
        "slug": "gemini-1-5-pro",
        "canonicalModelKey": "gemini-1-5-pro",
        "model": "Gemini 1.5 Pro",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "2M",
        "contextWindowTokens": 2000000,
        "displayScore": 35,
        "rankingEligible": true,
        "overallRank": 89,
        "url": "https://benchlm.ai/models/gemini-1-5-pro",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-1-5-pro.md",
        "score": 35.5,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 80,
        "slug": "claude-4-1-opus-thinking",
        "canonicalModelKey": "claude-4-1-opus-thinking",
        "model": "Claude 4.1 Opus Thinking",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 43,
        "rankingEligible": true,
        "overallRank": 80,
        "url": "https://benchlm.ai/models/claude-4-1-opus-thinking",
        "markdownUrl": "https://benchlm.ai/md/models/claude-4-1-opus-thinking.md",
        "score": 35.2,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 81,
        "slug": "mistral-8x7b",
        "canonicalModelKey": "mistral-8x7b",
        "model": "Mistral 8x7B",
        "creator": "Mistral",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 23,
        "rankingEligible": true,
        "overallRank": 110,
        "url": "https://benchlm.ai/models/mistral-8x7b",
        "markdownUrl": "https://benchlm.ai/md/models/mistral-8x7b.md",
        "score": 35.1,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 82,
        "slug": "grok-code-fast-1",
        "canonicalModelKey": "grok-code-fast-1",
        "model": "Grok Code Fast 1",
        "creator": "xAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "256K",
        "contextWindowTokens": 256000,
        "displayScore": 39,
        "rankingEligible": true,
        "overallRank": 85,
        "url": "https://benchlm.ai/models/grok-code-fast-1",
        "markdownUrl": "https://benchlm.ai/md/models/grok-code-fast-1.md",
        "score": 34.7,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 83,
        "slug": "grok-3-beta",
        "canonicalModelKey": "grok-3-beta",
        "model": "Grok 3 [Beta]",
        "creator": "xAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 30,
        "rankingEligible": true,
        "overallRank": 97,
        "url": "https://benchlm.ai/models/grok-3-beta",
        "markdownUrl": "https://benchlm.ai/md/models/grok-3-beta.md",
        "score": 34.2,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 84,
        "slug": "claude-3-opus",
        "canonicalModelKey": "claude-3-opus",
        "model": "Claude 3 Opus",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 34,
        "rankingEligible": true,
        "overallRank": 92,
        "url": "https://benchlm.ai/models/claude-3-opus",
        "markdownUrl": "https://benchlm.ai/md/models/claude-3-opus.md",
        "score": 31.5,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 85,
        "slug": "deepseek-v4-flash-base",
        "canonicalModelKey": "deepseek-v4-flash-base",
        "model": "DeepSeek V4 Flash Base",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 29,
        "rankingEligible": false,
        "overallRank": null,
        "url": "https://benchlm.ai/models/deepseek-v4-flash-base",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-v4-flash-base.md",
        "score": 30.1,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 24,
        "scoreConfidence": 2
      },
      {
        "rank": 86,
        "slug": "nemotron-3-nano-30b",
        "canonicalModelKey": "nemotron-3-nano-30b",
        "model": "Nemotron 3 Nano 30B",
        "creator": "NVIDIA",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 25,
        "rankingEligible": true,
        "overallRank": 104,
        "url": "https://benchlm.ai/models/nemotron-3-nano-30b",
        "markdownUrl": "https://benchlm.ai/md/models/nemotron-3-nano-30b.md",
        "score": 29.8,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 87,
        "slug": "gpt-4-turbo",
        "canonicalModelKey": "gpt-4-turbo",
        "model": "GPT-4 Turbo",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 24,
        "rankingEligible": true,
        "overallRank": 107,
        "url": "https://benchlm.ai/models/gpt-4-turbo",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-4-turbo.md",
        "score": 28.6,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 88,
        "slug": "gemini-2-5-flash",
        "canonicalModelKey": "gemini-2-5-flash",
        "model": "Gemini 2.5 Flash",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 37,
        "rankingEligible": true,
        "overallRank": 88,
        "url": "https://benchlm.ai/models/gemini-2-5-flash",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-2-5-flash.md",
        "score": 26.6,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 89,
        "slug": "z-1",
        "canonicalModelKey": "z-1",
        "model": "Z-1",
        "creator": "Z",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 23,
        "rankingEligible": true,
        "overallRank": 109,
        "url": "https://benchlm.ai/models/z-1",
        "markdownUrl": "https://benchlm.ai/md/models/z-1.md",
        "score": 26,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 90,
        "slug": "llama-3-70b",
        "canonicalModelKey": "llama-3-70b",
        "model": "Llama 3 70B",
        "creator": "Meta",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 25,
        "rankingEligible": true,
        "overallRank": 105,
        "url": "https://benchlm.ai/models/llama-3-70b",
        "markdownUrl": "https://benchlm.ai/md/models/llama-3-70b.md",
        "score": 25.8,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 91,
        "slug": "nemotron-4-15b",
        "canonicalModelKey": "nemotron-4-15b",
        "model": "Nemotron-4 15B",
        "creator": "NVIDIA",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 22,
        "rankingEligible": true,
        "overallRank": 113,
        "url": "https://benchlm.ai/models/nemotron-4-15b",
        "markdownUrl": "https://benchlm.ai/md/models/nemotron-4-15b.md",
        "score": 25.7,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 92,
        "slug": "claude-3-haiku",
        "canonicalModelKey": "claude-3-haiku",
        "model": "Claude 3 Haiku",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 23,
        "rankingEligible": true,
        "overallRank": 111,
        "url": "https://benchlm.ai/models/claude-3-haiku",
        "markdownUrl": "https://benchlm.ai/md/models/claude-3-haiku.md",
        "score": 25.4,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 93,
        "slug": "moonshot-v1",
        "canonicalModelKey": "moonshot-v1",
        "model": "Moonshot v1",
        "creator": "Moonshot AI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 22,
        "rankingEligible": true,
        "overallRank": 114,
        "url": "https://benchlm.ai/models/moonshot-v1",
        "markdownUrl": "https://benchlm.ai/md/models/moonshot-v1.md",
        "score": 25.2,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 94,
        "slug": "gemini-1-0-pro",
        "canonicalModelKey": "gemini-1-0-pro",
        "model": "Gemini 1.0 Pro",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 24,
        "rankingEligible": true,
        "overallRank": 108,
        "url": "https://benchlm.ai/models/gemini-1-0-pro",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-1-0-pro.md",
        "score": 25,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 95,
        "slug": "nemotron-ultra-253b",
        "canonicalModelKey": "nemotron-ultra-253b",
        "model": "Nemotron Ultra 253B",
        "creator": "NVIDIA",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 22,
        "rankingEligible": true,
        "overallRank": 115,
        "url": "https://benchlm.ai/models/nemotron-ultra-253b",
        "markdownUrl": "https://benchlm.ai/md/models/nemotron-ultra-253b.md",
        "score": 24.9,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 96,
        "slug": "gemma-3-27b",
        "canonicalModelKey": "gemma-3-27b",
        "model": "Gemma 3 27B",
        "creator": "Google",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 16,
        "rankingEligible": true,
        "overallRank": 118,
        "url": "https://benchlm.ai/models/gemma-3-27b",
        "markdownUrl": "https://benchlm.ai/md/models/gemma-3-27b.md",
        "score": 16.6,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 97,
        "slug": "llama-4-scout",
        "canonicalModelKey": "llama-4-scout",
        "model": "Llama 4 Scout",
        "creator": "Meta",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "10M",
        "contextWindowTokens": 10000000,
        "displayScore": 25,
        "rankingEligible": true,
        "overallRank": 103,
        "url": "https://benchlm.ai/models/llama-4-scout",
        "markdownUrl": "https://benchlm.ai/md/models/llama-4-scout.md",
        "score": 15.4,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 98,
        "slug": "llama-4-behemoth",
        "canonicalModelKey": "llama-4-behemoth",
        "model": "Llama 4 Behemoth",
        "creator": "Meta",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 11,
        "rankingEligible": true,
        "overallRank": 120,
        "url": "https://benchlm.ai/models/llama-4-behemoth",
        "markdownUrl": "https://benchlm.ai/md/models/llama-4-behemoth.md",
        "score": 14.3,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 99,
        "slug": "llama-4-maverick",
        "canonicalModelKey": "llama-4-maverick",
        "model": "Llama 4 Maverick",
        "creator": "Meta",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 17,
        "rankingEligible": true,
        "overallRank": 117,
        "url": "https://benchlm.ai/models/llama-4-maverick",
        "markdownUrl": "https://benchlm.ai/md/models/llama-4-maverick.md",
        "score": 13.7,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 100,
        "slug": "deepseek-v3-1-reasoning",
        "canonicalModelKey": "deepseek-v3-1-reasoning",
        "model": "DeepSeek V3.1 (Reasoning)",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 29,
        "rankingEligible": true,
        "overallRank": 98,
        "url": "https://benchlm.ai/models/deepseek-v3-1-reasoning",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-v3-1-reasoning.md",
        "score": 13.4,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 101,
        "slug": "glm-4-5",
        "canonicalModelKey": "glm-4-5",
        "model": "GLM-4.5",
        "creator": "Z.AI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 25,
        "rankingEligible": true,
        "overallRank": 102,
        "url": "https://benchlm.ai/models/glm-4-5",
        "markdownUrl": "https://benchlm.ai/md/models/glm-4-5.md",
        "score": 11.7,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 102,
        "slug": "nova-pro",
        "canonicalModelKey": "nova-pro",
        "model": "Nova Pro",
        "creator": "Amazon",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 10,
        "rankingEligible": true,
        "overallRank": 121,
        "url": "https://benchlm.ai/models/nova-pro",
        "markdownUrl": "https://benchlm.ai/md/models/nova-pro.md",
        "score": 10.3,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 103,
        "slug": "deepseek-v3-1",
        "canonicalModelKey": "deepseek-v3-1",
        "model": "DeepSeek V3.1",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 24,
        "rankingEligible": true,
        "overallRank": 106,
        "url": "https://benchlm.ai/models/deepseek-v3-1",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-v3-1.md",
        "score": 9.8,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 104,
        "slug": "glm-4-5-air",
        "canonicalModelKey": "glm-4-5-air",
        "model": "GLM-4.5-Air",
        "creator": "Z.AI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 18,
        "rankingEligible": true,
        "overallRank": 116,
        "url": "https://benchlm.ai/models/glm-4-5-air",
        "markdownUrl": "https://benchlm.ai/md/models/glm-4-5-air.md",
        "score": 8.9,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 105,
        "slug": "gpt-oss-20b",
        "canonicalModelKey": "gpt-oss-20b",
        "model": "GPT-OSS 20B",
        "creator": "OpenAI",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 16,
        "rankingEligible": true,
        "overallRank": 119,
        "url": "https://benchlm.ai/models/gpt-oss-20b",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-oss-20b.md",
        "score": 5.9,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 106,
        "slug": "mistral-7b-v0-3",
        "canonicalModelKey": "mistral-7b-v0-3",
        "model": "Mistral 7B v0.3",
        "creator": "Mistral",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 4,
        "rankingEligible": true,
        "overallRank": 122,
        "url": "https://benchlm.ai/models/mistral-7b-v0-3",
        "markdownUrl": "https://benchlm.ai/md/models/mistral-7b-v0-3.md",
        "score": 4.9,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 107,
        "slug": "mistral-8x7b-v0-2",
        "canonicalModelKey": "mistral-8x7b-v0-2",
        "model": "Mistral 8x7B v0.2",
        "creator": "Mistral",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 1,
        "rankingEligible": true,
        "overallRank": 123,
        "url": "https://benchlm.ai/models/mistral-8x7b-v0-2",
        "markdownUrl": "https://benchlm.ai/md/models/mistral-8x7b-v0-2.md",
        "score": 2.4,
        "category": "knowledge",
        "categoryLabel": "Knowledge",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      }
    ],
    "multilingual": [
      {
        "rank": 1,
        "slug": "claude-mythos-5",
        "canonicalModelKey": "claude-mythos-5",
        "model": "Claude Mythos 5",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "1M+",
        "contextWindowTokens": 1000000,
        "displayScore": 99,
        "rankingEligible": true,
        "overallRank": 1,
        "url": "https://benchlm.ai/models/claude-mythos-5",
        "markdownUrl": "https://benchlm.ai/md/models/claude-mythos-5.md",
        "score": 100,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 17,
        "scoreConfidence": 3
      },
      {
        "rank": 2,
        "slug": "claude-fable",
        "canonicalModelKey": "claude-fable-5",
        "model": "Claude Fable 5",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "1M+",
        "contextWindowTokens": 1000000,
        "displayScore": 97,
        "rankingEligible": true,
        "overallRank": 2,
        "url": "https://benchlm.ai/models/claude-fable",
        "markdownUrl": "https://benchlm.ai/md/models/claude-fable.md",
        "score": 100,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 17,
        "scoreConfidence": 3
      },
      {
        "rank": 3,
        "slug": "gemini-3-1-pro",
        "canonicalModelKey": "gemini-3-1-pro",
        "model": "Gemini 3.1 Pro",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 91,
        "rankingEligible": true,
        "overallRank": 4,
        "url": "https://benchlm.ai/models/gemini-3-1-pro",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-3-1-pro.md",
        "score": 100,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 17,
        "scoreConfidence": 3
      },
      {
        "rank": 4,
        "slug": "gpt-5-4",
        "canonicalModelKey": "gpt-5-4",
        "model": "GPT-5.4",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "1.05M",
        "contextWindowTokens": 1050000,
        "displayScore": 88,
        "rankingEligible": true,
        "overallRank": 10,
        "url": "https://benchlm.ai/models/gpt-5-4",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-4.md",
        "score": 100,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 26,
        "scoreConfidence": 4
      },
      {
        "rank": 5,
        "slug": "claude-opus-4-6",
        "canonicalModelKey": "claude-opus-4-6",
        "model": "Claude Opus 4.6",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 86,
        "rankingEligible": true,
        "overallRank": 12,
        "url": "https://benchlm.ai/models/claude-opus-4-6",
        "markdownUrl": "https://benchlm.ai/md/models/claude-opus-4-6.md",
        "score": 100,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 27,
        "scoreConfidence": 4
      },
      {
        "rank": 6,
        "slug": "grok-4-1",
        "canonicalModelKey": "grok-4-1",
        "model": "Grok 4.1",
        "creator": "xAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 89,
        "rankingEligible": true,
        "overallRank": 9,
        "url": "https://benchlm.ai/models/grok-4-1",
        "markdownUrl": "https://benchlm.ai/md/models/grok-4-1.md",
        "score": 96.9,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 7,
        "slug": "gpt-5-3-codex",
        "canonicalModelKey": "gpt-5-3-codex",
        "model": "GPT-5.3 Codex",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "400K",
        "contextWindowTokens": 400000,
        "displayScore": 85,
        "rankingEligible": true,
        "overallRank": 15,
        "url": "https://benchlm.ai/models/gpt-5-3-codex",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-3-codex.md",
        "score": 96.9,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 6,
        "scoreConfidence": 1
      },
      {
        "rank": 8,
        "slug": "gpt-5-2",
        "canonicalModelKey": "gpt-5-2",
        "model": "GPT-5.2",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "400K",
        "contextWindowTokens": 400000,
        "displayScore": 78,
        "rankingEligible": true,
        "overallRank": 25,
        "url": "https://benchlm.ai/models/gpt-5-2",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-2.md",
        "score": 95.9,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 11,
        "scoreConfidence": 2
      },
      {
        "rank": 9,
        "slug": "claude-sonnet-4-6",
        "canonicalModelKey": "claude-sonnet-4-6",
        "model": "Claude Sonnet 4.6",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 82,
        "rankingEligible": true,
        "overallRank": 18,
        "url": "https://benchlm.ai/models/claude-sonnet-4-6",
        "markdownUrl": "https://benchlm.ai/md/models/claude-sonnet-4-6.md",
        "score": 88.4,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 14,
        "scoreConfidence": 3
      },
      {
        "rank": 10,
        "slug": "kimi-k2-5-reasoning",
        "canonicalModelKey": "kimi-k2-5-reasoning",
        "model": "Kimi K2.5 (Reasoning)",
        "creator": "Moonshot AI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 75,
        "rankingEligible": true,
        "overallRank": 31,
        "url": "https://benchlm.ai/models/kimi-k2-5-reasoning",
        "markdownUrl": "https://benchlm.ai/md/models/kimi-k2-5-reasoning.md",
        "score": 87.8,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 8,
        "scoreConfidence": 2
      },
      {
        "rank": 11,
        "slug": "gpt-5-2-codex",
        "canonicalModelKey": "gpt-5-2-codex",
        "model": "GPT-5.2-Codex",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "400K",
        "contextWindowTokens": 400000,
        "displayScore": 76,
        "rankingEligible": true,
        "overallRank": 30,
        "url": "https://benchlm.ai/models/gpt-5-2-codex",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-2-codex.md",
        "score": 84.8,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 12,
        "slug": "claude-sonnet-4-5",
        "canonicalModelKey": "claude-sonnet-4-5",
        "model": "Claude Sonnet 4.5",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 64,
        "rankingEligible": true,
        "overallRank": 46,
        "url": "https://benchlm.ai/models/claude-sonnet-4-5",
        "markdownUrl": "https://benchlm.ai/md/models/claude-sonnet-4-5.md",
        "score": 84.8,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 6,
        "scoreConfidence": 1
      },
      {
        "rank": 13,
        "slug": "qwen3-7-max",
        "canonicalModelKey": "qwen3-7-max",
        "model": "Qwen3.7 Max",
        "creator": "Alibaba",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 91,
        "rankingEligible": true,
        "overallRank": 5,
        "url": "https://benchlm.ai/models/qwen3-7-max",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-7-max.md",
        "score": 84,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 33,
        "scoreConfidence": 3
      },
      {
        "rank": 14,
        "slug": "gpt-5-medium",
        "canonicalModelKey": "gpt-5-medium",
        "model": "GPT-5 (medium)",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 70,
        "rankingEligible": true,
        "overallRank": 37,
        "url": "https://benchlm.ai/models/gpt-5-medium",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-medium.md",
        "score": 83.8,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 15,
        "slug": "qwen3-5-397b-reasoning",
        "canonicalModelKey": "qwen3-5-397b-reasoning",
        "model": "Qwen3.5 397B (Reasoning)",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 77,
        "rankingEligible": true,
        "overallRank": 26,
        "url": "https://benchlm.ai/models/qwen3-5-397b-reasoning",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-5-397b-reasoning.md",
        "score": 82.9,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 16,
        "slug": "gpt-5-1",
        "canonicalModelKey": "gpt-5-1",
        "model": "GPT-5.1",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 77,
        "rankingEligible": true,
        "overallRank": 27,
        "url": "https://benchlm.ai/models/gpt-5-1",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-1.md",
        "score": 82.8,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 17,
        "slug": "gpt-5-1-codex-max",
        "canonicalModelKey": "gpt-5-1-codex-max",
        "model": "GPT-5.1-Codex-Max",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "400K",
        "contextWindowTokens": 400000,
        "displayScore": 75,
        "rankingEligible": true,
        "overallRank": 32,
        "url": "https://benchlm.ai/models/gpt-5-1-codex-max",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-1-codex-max.md",
        "score": 82.8,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 18,
        "slug": "gemini-3-pro-deep-think",
        "canonicalModelKey": "gemini-3-pro-deep-think",
        "model": "Gemini 3 Pro Deep Think",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "2M",
        "contextWindowTokens": 2000000,
        "displayScore": 89,
        "rankingEligible": true,
        "overallRank": 8,
        "url": "https://benchlm.ai/models/gemini-3-pro-deep-think",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-3-pro-deep-think.md",
        "score": 82.1,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 19,
        "slug": "o1-preview",
        "canonicalModelKey": "o1-preview",
        "model": "o1-preview",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 82,
        "rankingEligible": true,
        "overallRank": 20,
        "url": "https://benchlm.ai/models/o1-preview",
        "markdownUrl": "https://benchlm.ai/md/models/o1-preview.md",
        "score": 82,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 20,
        "slug": "claude-opus-4-5",
        "canonicalModelKey": "claude-opus-4-5",
        "model": "Claude Opus 4.5",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 76,
        "rankingEligible": true,
        "overallRank": 28,
        "url": "https://benchlm.ai/models/claude-opus-4-5",
        "markdownUrl": "https://benchlm.ai/md/models/claude-opus-4-5.md",
        "score": 81.4,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 41,
        "scoreConfidence": 4
      },
      {
        "rank": 21,
        "slug": "qwen3-7-plus",
        "canonicalModelKey": "qwen3-7-plus",
        "model": "Qwen3.7 Plus",
        "creator": "Alibaba",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 88,
        "rankingEligible": true,
        "overallRank": 11,
        "url": "https://benchlm.ai/models/qwen3-7-plus",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-7-plus.md",
        "score": 79.6,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 50,
        "scoreConfidence": 4
      },
      {
        "rank": 22,
        "slug": "gemini-3-pro",
        "canonicalModelKey": "gemini-3-pro",
        "model": "Gemini 3 Pro",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "2M",
        "contextWindowTokens": 2000000,
        "displayScore": 80,
        "rankingEligible": true,
        "overallRank": 22,
        "url": "https://benchlm.ai/models/gemini-3-pro",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-3-pro.md",
        "score": 79.2,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 8,
        "scoreConfidence": 2
      },
      {
        "rank": 23,
        "slug": "glm-5-reasoning",
        "canonicalModelKey": "glm-5-reasoning",
        "model": "GLM-5 (Reasoning)",
        "creator": "Z.AI",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 79,
        "rankingEligible": true,
        "overallRank": 24,
        "url": "https://benchlm.ai/models/glm-5-reasoning",
        "markdownUrl": "https://benchlm.ai/md/models/glm-5-reasoning.md",
        "score": 79.2,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 24,
        "slug": "gpt-5-high",
        "canonicalModelKey": "gpt-5-high",
        "model": "GPT-5 (high)",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 76,
        "rankingEligible": true,
        "overallRank": 29,
        "url": "https://benchlm.ai/models/gpt-5-high",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-high.md",
        "score": 79.2,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 25,
        "slug": "qwen3-6-plus",
        "canonicalModelKey": "qwen3-6-plus",
        "model": "Qwen3.6 Plus",
        "creator": "Alibaba",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 66,
        "rankingEligible": true,
        "overallRank": 43,
        "url": "https://benchlm.ai/models/qwen3-6-plus",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-6-plus.md",
        "score": 77.6,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 38,
        "scoreConfidence": 4
      },
      {
        "rank": 26,
        "slug": "grok-4-1-fast",
        "canonicalModelKey": "grok-4-1-fast",
        "model": "Grok 4.1 Fast",
        "creator": "xAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 68,
        "rankingEligible": true,
        "overallRank": 41,
        "url": "https://benchlm.ai/models/grok-4-1-fast",
        "markdownUrl": "https://benchlm.ai/md/models/grok-4-1-fast.md",
        "score": 74.6,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 27,
        "slug": "nemotron-3-ultra",
        "canonicalModelKey": "nemotron-3-ultra-500b",
        "model": "Nemotron 3 Ultra",
        "creator": "NVIDIA",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 68,
        "rankingEligible": true,
        "overallRank": 38,
        "url": "https://benchlm.ai/models/nemotron-3-ultra",
        "markdownUrl": "https://benchlm.ai/md/models/nemotron-3-ultra.md",
        "score": 72.8,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 18,
        "scoreConfidence": 3
      },
      {
        "rank": 28,
        "slug": "qwen3-5-397b",
        "canonicalModelKey": "qwen3-5-397b",
        "model": "Qwen3.5 397B",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 62,
        "rankingEligible": true,
        "overallRank": 51,
        "url": "https://benchlm.ai/models/qwen3-5-397b",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-5-397b.md",
        "score": 71.8,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 36,
        "scoreConfidence": 4
      },
      {
        "rank": 29,
        "slug": "deepseek-v4-flash-base",
        "canonicalModelKey": "deepseek-v4-flash-base",
        "model": "DeepSeek V4 Flash Base",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 29,
        "rankingEligible": false,
        "overallRank": null,
        "url": "https://benchlm.ai/models/deepseek-v4-flash-base",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-v4-flash-base.md",
        "score": 71.4,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 24,
        "scoreConfidence": 2
      },
      {
        "rank": 30,
        "slug": "glm-4-7",
        "canonicalModelKey": "glm-4-7",
        "model": "GLM-4.7",
        "creator": "Z.AI",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 68,
        "rankingEligible": true,
        "overallRank": 40,
        "url": "https://benchlm.ai/models/glm-4-7",
        "markdownUrl": "https://benchlm.ai/md/models/glm-4-7.md",
        "score": 71.3,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 7,
        "scoreConfidence": 1
      },
      {
        "rank": 31,
        "slug": "glm-5",
        "canonicalModelKey": "glm-5",
        "model": "GLM-5",
        "creator": "Z.AI",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 67,
        "rankingEligible": true,
        "overallRank": 42,
        "url": "https://benchlm.ai/models/glm-5",
        "markdownUrl": "https://benchlm.ai/md/models/glm-5.md",
        "score": 70.9,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 33,
        "scoreConfidence": 4
      },
      {
        "rank": 32,
        "slug": "qwen3-5-122b-a10b",
        "canonicalModelKey": "qwen3-5-122b-a10b",
        "model": "Qwen3.5-122B-A10B",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "262K",
        "contextWindowTokens": 262000,
        "displayScore": 63,
        "rankingEligible": true,
        "overallRank": 48,
        "url": "https://benchlm.ai/models/qwen3-5-122b-a10b",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-5-122b-a10b.md",
        "score": 70.6,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 13,
        "scoreConfidence": 3
      },
      {
        "rank": 33,
        "slug": "qwen3-5-27b",
        "canonicalModelKey": "qwen3-5-27b",
        "model": "Qwen3.5-27B",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "262K",
        "contextWindowTokens": 262000,
        "displayScore": 61,
        "rankingEligible": true,
        "overallRank": 52,
        "url": "https://benchlm.ai/models/qwen3-5-27b",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-5-27b.md",
        "score": 70.6,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 13,
        "scoreConfidence": 3
      },
      {
        "rank": 34,
        "slug": "gemini-2-5-pro",
        "canonicalModelKey": "gemini-2-5-pro",
        "model": "Gemini 2.5 Pro",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 63,
        "rankingEligible": true,
        "overallRank": 49,
        "url": "https://benchlm.ai/models/gemini-2-5-pro",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-2-5-pro.md",
        "score": 68.9,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 3,
        "scoreConfidence": 1
      },
      {
        "rank": 35,
        "slug": "kimi-k2-5",
        "canonicalModelKey": "kimi-k2-5",
        "model": "Kimi K2.5",
        "creator": "Moonshot AI",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "256K",
        "contextWindowTokens": 256000,
        "displayScore": 63,
        "rankingEligible": true,
        "overallRank": 47,
        "url": "https://benchlm.ai/models/kimi-k2-5",
        "markdownUrl": "https://benchlm.ai/md/models/kimi-k2-5.md",
        "score": 68.4,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 39,
        "scoreConfidence": 4
      },
      {
        "rank": 36,
        "slug": "deepseekmath-v2",
        "canonicalModelKey": "deepseekmath-v2",
        "model": "DeepSeekMath V2",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 49,
        "rankingEligible": true,
        "overallRank": 73,
        "url": "https://benchlm.ai/models/deepseekmath-v2",
        "markdownUrl": "https://benchlm.ai/md/models/deepseekmath-v2.md",
        "score": 68.1,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 37,
        "slug": "deepseek-v4-pro-base",
        "canonicalModelKey": "deepseek-v4-pro-base",
        "model": "DeepSeek V4 Pro Base",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 41,
        "rankingEligible": false,
        "overallRank": null,
        "url": "https://benchlm.ai/models/deepseek-v4-pro-base",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-v4-pro-base.md",
        "score": 67.8,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 24,
        "scoreConfidence": 2
      },
      {
        "rank": 38,
        "slug": "qwen3-5-35b-a3b",
        "canonicalModelKey": "qwen3-5-35b-a3b",
        "model": "Qwen3.5-35B-A3B",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "262K",
        "contextWindowTokens": 262000,
        "displayScore": 55,
        "rankingEligible": true,
        "overallRank": 62,
        "url": "https://benchlm.ai/models/qwen3-5-35b-a3b",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-5-35b-a3b.md",
        "score": 67.2,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 13,
        "scoreConfidence": 3
      },
      {
        "rank": 39,
        "slug": "deepseek-v3-2",
        "canonicalModelKey": "deepseek-v3-2",
        "model": "DeepSeek V3.2",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 56,
        "rankingEligible": true,
        "overallRank": 59,
        "url": "https://benchlm.ai/models/deepseek-v3-2",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-v3-2.md",
        "score": 67,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 4,
        "scoreConfidence": 1
      },
      {
        "rank": 40,
        "slug": "claude-4-sonnet",
        "canonicalModelKey": "claude-4-sonnet",
        "model": "Claude 4 Sonnet",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 50,
        "rankingEligible": true,
        "overallRank": 70,
        "url": "https://benchlm.ai/models/claude-4-sonnet",
        "markdownUrl": "https://benchlm.ai/md/models/claude-4-sonnet.md",
        "score": 67,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 41,
        "slug": "claude-4-1-opus",
        "canonicalModelKey": "claude-4-1-opus",
        "model": "Claude 4.1 Opus",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 51,
        "rankingEligible": true,
        "overallRank": 67,
        "url": "https://benchlm.ai/models/claude-4-1-opus",
        "markdownUrl": "https://benchlm.ai/md/models/claude-4-1-opus.md",
        "score": 66.2,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 42,
        "slug": "o4-mini-high",
        "canonicalModelKey": "o4-mini-high",
        "model": "o4-mini (high)",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 43,
        "rankingEligible": true,
        "overallRank": 79,
        "url": "https://benchlm.ai/models/o4-mini-high",
        "markdownUrl": "https://benchlm.ai/md/models/o4-mini-high.md",
        "score": 66.1,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 43,
        "slug": "o3-pro",
        "canonicalModelKey": "o3-pro",
        "model": "o3-pro",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 57,
        "rankingEligible": true,
        "overallRank": 57,
        "url": "https://benchlm.ai/models/o3-pro",
        "markdownUrl": "https://benchlm.ai/md/models/o3-pro.md",
        "score": 64.2,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 44,
        "slug": "o3",
        "canonicalModelKey": "o3",
        "model": "o3",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 56,
        "rankingEligible": true,
        "overallRank": 61,
        "url": "https://benchlm.ai/models/o3",
        "markdownUrl": "https://benchlm.ai/md/models/o3.md",
        "score": 64.2,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 45,
        "slug": "grok-4",
        "canonicalModelKey": "grok-4",
        "model": "Grok 4",
        "creator": "xAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 63,
        "rankingEligible": true,
        "overallRank": 50,
        "url": "https://benchlm.ai/models/grok-4",
        "markdownUrl": "https://benchlm.ai/md/models/grok-4.md",
        "score": 63.4,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 46,
        "slug": "deepseek-v3-2-thinking",
        "canonicalModelKey": "deepseek-v3-2-thinking",
        "model": "DeepSeek V3.2 (Thinking)",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 60,
        "rankingEligible": true,
        "overallRank": 53,
        "url": "https://benchlm.ai/models/deepseek-v3-2-thinking",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-v3-2-thinking.md",
        "score": 63.4,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 47,
        "slug": "qwen2-5-72b",
        "canonicalModelKey": "qwen2-5-72b",
        "model": "Qwen2.5-72B",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 49,
        "rankingEligible": true,
        "overallRank": 72,
        "url": "https://benchlm.ai/models/qwen2-5-72b",
        "markdownUrl": "https://benchlm.ai/md/models/qwen2-5-72b.md",
        "score": 63.4,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 48,
        "slug": "gemini-3-flash",
        "canonicalModelKey": "gemini-3-flash",
        "model": "Gemini 3 Flash",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 55,
        "rankingEligible": true,
        "overallRank": 63,
        "url": "https://benchlm.ai/models/gemini-3-flash",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-3-flash.md",
        "score": 62.5,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 2,
        "scoreConfidence": 1
      },
      {
        "rank": 49,
        "slug": "claude-3-5-sonnet",
        "canonicalModelKey": "claude-3-5-sonnet",
        "model": "Claude 3.5 Sonnet",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 40,
        "rankingEligible": true,
        "overallRank": 84,
        "url": "https://benchlm.ai/models/claude-3-5-sonnet",
        "markdownUrl": "https://benchlm.ai/md/models/claude-3-5-sonnet.md",
        "score": 62.5,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 50,
        "slug": "qwen2-5-1m",
        "canonicalModelKey": "qwen2-5-1m",
        "model": "Qwen2.5-1M",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 50,
        "rankingEligible": true,
        "overallRank": 69,
        "url": "https://benchlm.ai/models/qwen2-5-1m",
        "markdownUrl": "https://benchlm.ai/md/models/qwen2-5-1m.md",
        "score": 62.3,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 51,
        "slug": "llama-3-1-405b",
        "canonicalModelKey": "llama-3-1-405b",
        "model": "Llama 3.1 405B",
        "creator": "Meta",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 40,
        "rankingEligible": true,
        "overallRank": 83,
        "url": "https://benchlm.ai/models/llama-3-1-405b",
        "markdownUrl": "https://benchlm.ai/md/models/llama-3-1-405b.md",
        "score": 61.6,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 52,
        "slug": "claude-haiku-4-5",
        "canonicalModelKey": "claude-haiku-4-5",
        "model": "Claude Haiku 4.5",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 56,
        "rankingEligible": true,
        "overallRank": 60,
        "url": "https://benchlm.ai/models/claude-haiku-4-5",
        "markdownUrl": "https://benchlm.ai/md/models/claude-haiku-4-5.md",
        "score": 61.4,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 53,
        "slug": "deepseek-coder-2-0",
        "canonicalModelKey": "deepseek-coder-2-0",
        "model": "DeepSeek Coder 2.0",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 51,
        "rankingEligible": true,
        "overallRank": 66,
        "url": "https://benchlm.ai/models/deepseek-coder-2-0",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-coder-2-0.md",
        "score": 60.6,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 54,
        "slug": "nemotron-3-super-100b",
        "canonicalModelKey": "nemotron-3-super-100b",
        "model": "Nemotron 3 Super 100B",
        "creator": "NVIDIA",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 43,
        "rankingEligible": true,
        "overallRank": 78,
        "url": "https://benchlm.ai/models/nemotron-3-super-100b",
        "markdownUrl": "https://benchlm.ai/md/models/nemotron-3-super-100b.md",
        "score": 59.7,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 55,
        "slug": "mimo-v2-flash",
        "canonicalModelKey": "mimo-v2-flash",
        "model": "MiMo-V2-Flash",
        "creator": "Xiaomi",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "256K",
        "contextWindowTokens": 256000,
        "displayScore": 59,
        "rankingEligible": true,
        "overallRank": 54,
        "url": "https://benchlm.ai/models/mimo-v2-flash",
        "markdownUrl": "https://benchlm.ai/md/models/mimo-v2-flash.md",
        "score": 58.8,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 56,
        "slug": "mistral-large-2",
        "canonicalModelKey": "mistral-large-2",
        "model": "Mistral Large 2",
        "creator": "Mistral",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 38,
        "rankingEligible": true,
        "overallRank": 87,
        "url": "https://benchlm.ai/models/mistral-large-2",
        "markdownUrl": "https://benchlm.ai/md/models/mistral-large-2.md",
        "score": 58.7,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 57,
        "slug": "deepseek-llm-2-0",
        "canonicalModelKey": "deepseek-llm-2-0",
        "model": "DeepSeek LLM 2.0",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 50,
        "rankingEligible": true,
        "overallRank": 68,
        "url": "https://benchlm.ai/models/deepseek-llm-2-0",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-llm-2-0.md",
        "score": 57.8,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 58,
        "slug": "mistral-large-3",
        "canonicalModelKey": "mistral-large-3",
        "model": "Mistral Large 3",
        "creator": "Mistral",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 48,
        "rankingEligible": true,
        "overallRank": 74,
        "url": "https://benchlm.ai/models/mistral-large-3",
        "markdownUrl": "https://benchlm.ai/md/models/mistral-large-3.md",
        "score": 57.8,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 59,
        "slug": "o1",
        "canonicalModelKey": "o1",
        "model": "o1",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 57,
        "rankingEligible": true,
        "overallRank": 58,
        "url": "https://benchlm.ai/models/o1",
        "markdownUrl": "https://benchlm.ai/md/models/o1.md",
        "score": 56,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 3,
        "scoreConfidence": 1
      },
      {
        "rank": 60,
        "slug": "claude-4-1-opus-thinking",
        "canonicalModelKey": "claude-4-1-opus-thinking",
        "model": "Claude 4.1 Opus Thinking",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 43,
        "rankingEligible": true,
        "overallRank": 80,
        "url": "https://benchlm.ai/models/claude-4-1-opus-thinking",
        "markdownUrl": "https://benchlm.ai/md/models/claude-4-1-opus-thinking.md",
        "score": 50.5,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 61,
        "slug": "gpt-4o",
        "canonicalModelKey": "gpt-4o",
        "model": "GPT-4o",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 42,
        "rankingEligible": true,
        "overallRank": 81,
        "url": "https://benchlm.ai/models/gpt-4o",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-4o.md",
        "score": 48.7,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 62,
        "slug": "gpt-4o-mini",
        "canonicalModelKey": "gpt-4o-mini",
        "model": "GPT-4o mini",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 49,
        "rankingEligible": true,
        "overallRank": 71,
        "url": "https://benchlm.ai/models/gpt-4o-mini",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-4o-mini.md",
        "score": 46.3,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 63,
        "slug": "qwen3-235b-2507-reasoning",
        "canonicalModelKey": "qwen3-235b-2507-reasoning",
        "model": "Qwen3 235B 2507 (Reasoning)",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 45,
        "rankingEligible": true,
        "overallRank": 76,
        "url": "https://benchlm.ai/models/qwen3-235b-2507-reasoning",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-235b-2507-reasoning.md",
        "score": 45.6,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 64,
        "slug": "o3-mini",
        "canonicalModelKey": "o3-mini",
        "model": "o3-mini",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 55,
        "rankingEligible": true,
        "overallRank": 64,
        "url": "https://benchlm.ai/models/o3-mini",
        "markdownUrl": "https://benchlm.ai/md/models/o3-mini.md",
        "score": 44.8,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 5,
        "scoreConfidence": 1
      },
      {
        "rank": 65,
        "slug": "grok-code-fast-1",
        "canonicalModelKey": "grok-code-fast-1",
        "model": "Grok Code Fast 1",
        "creator": "xAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "256K",
        "contextWindowTokens": 256000,
        "displayScore": 39,
        "rankingEligible": true,
        "overallRank": 85,
        "url": "https://benchlm.ai/models/grok-code-fast-1",
        "markdownUrl": "https://benchlm.ai/md/models/grok-code-fast-1.md",
        "score": 43.7,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 66,
        "slug": "qwen3-235b-2507",
        "canonicalModelKey": "qwen3-235b-2507",
        "model": "Qwen3 235B 2507",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 32,
        "rankingEligible": true,
        "overallRank": 95,
        "url": "https://benchlm.ai/models/qwen3-235b-2507",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-235b-2507.md",
        "score": 43.7,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 4,
        "scoreConfidence": 1
      },
      {
        "rank": 67,
        "slug": "gpt-4-1-mini",
        "canonicalModelKey": "gpt-4-1-mini",
        "model": "GPT-4.1 mini",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 45,
        "rankingEligible": true,
        "overallRank": 77,
        "url": "https://benchlm.ai/models/gpt-4-1-mini",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-4-1-mini.md",
        "score": 42,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 4,
        "scoreConfidence": 1
      },
      {
        "rank": 68,
        "slug": "z-1",
        "canonicalModelKey": "z-1",
        "model": "Z-1",
        "creator": "Z",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 23,
        "rankingEligible": true,
        "overallRank": 109,
        "url": "https://benchlm.ai/models/z-1",
        "markdownUrl": "https://benchlm.ai/md/models/z-1.md",
        "score": 40.9,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 69,
        "slug": "nemotron-4-15b",
        "canonicalModelKey": "nemotron-4-15b",
        "model": "Nemotron-4 15B",
        "creator": "NVIDIA",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 22,
        "rankingEligible": true,
        "overallRank": 113,
        "url": "https://benchlm.ai/models/nemotron-4-15b",
        "markdownUrl": "https://benchlm.ai/md/models/nemotron-4-15b.md",
        "score": 40.1,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 70,
        "slug": "mistral-8x7b",
        "canonicalModelKey": "mistral-8x7b",
        "model": "Mistral 8x7B",
        "creator": "Mistral",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 23,
        "rankingEligible": true,
        "overallRank": 110,
        "url": "https://benchlm.ai/models/mistral-8x7b",
        "markdownUrl": "https://benchlm.ai/md/models/mistral-8x7b.md",
        "score": 39.1,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 71,
        "slug": "nemotron-3-nano-30b",
        "canonicalModelKey": "nemotron-3-nano-30b",
        "model": "Nemotron 3 Nano 30B",
        "creator": "NVIDIA",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 25,
        "rankingEligible": true,
        "overallRank": 104,
        "url": "https://benchlm.ai/models/nemotron-3-nano-30b",
        "markdownUrl": "https://benchlm.ai/md/models/nemotron-3-nano-30b.md",
        "score": 38.3,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 72,
        "slug": "claude-3-haiku",
        "canonicalModelKey": "claude-3-haiku",
        "model": "Claude 3 Haiku",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 23,
        "rankingEligible": true,
        "overallRank": 111,
        "url": "https://benchlm.ai/models/claude-3-haiku",
        "markdownUrl": "https://benchlm.ai/md/models/claude-3-haiku.md",
        "score": 36.3,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 73,
        "slug": "gemini-2-5-flash",
        "canonicalModelKey": "gemini-2-5-flash",
        "model": "Gemini 2.5 Flash",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 37,
        "rankingEligible": true,
        "overallRank": 88,
        "url": "https://benchlm.ai/models/gemini-2-5-flash",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-2-5-flash.md",
        "score": 35.5,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 74,
        "slug": "gpt-oss-120b",
        "canonicalModelKey": "gpt-oss-120b",
        "model": "GPT-OSS 120B",
        "creator": "OpenAI",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 34,
        "rankingEligible": true,
        "overallRank": 91,
        "url": "https://benchlm.ai/models/gpt-oss-120b",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-oss-120b.md",
        "score": 35.3,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 75,
        "slug": "gpt-4-1",
        "canonicalModelKey": "gpt-4-1",
        "model": "GPT-4.1",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 57,
        "rankingEligible": true,
        "overallRank": 56,
        "url": "https://benchlm.ai/models/gpt-4-1",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-4-1.md",
        "score": 33.6,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 4,
        "scoreConfidence": 1
      },
      {
        "rank": 76,
        "slug": "nemotron-ultra-253b",
        "canonicalModelKey": "nemotron-ultra-253b",
        "model": "Nemotron Ultra 253B",
        "creator": "NVIDIA",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 22,
        "rankingEligible": true,
        "overallRank": 115,
        "url": "https://benchlm.ai/models/nemotron-ultra-253b",
        "markdownUrl": "https://benchlm.ai/md/models/nemotron-ultra-253b.md",
        "score": 33.6,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 77,
        "slug": "gemini-3-1-flash-lite",
        "canonicalModelKey": "gemini-3-1-flash-lite",
        "model": "Gemini 3.1 Flash-Lite",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 47,
        "rankingEligible": true,
        "overallRank": 75,
        "url": "https://benchlm.ai/models/gemini-3-1-flash-lite",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-3-1-flash-lite.md",
        "score": 32.7,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 2,
        "scoreConfidence": 1
      },
      {
        "rank": 78,
        "slug": "claude-3-opus",
        "canonicalModelKey": "claude-3-opus",
        "model": "Claude 3 Opus",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 34,
        "rankingEligible": true,
        "overallRank": 92,
        "url": "https://benchlm.ai/models/claude-3-opus",
        "markdownUrl": "https://benchlm.ai/md/models/claude-3-opus.md",
        "score": 32.7,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 79,
        "slug": "moonshot-v1",
        "canonicalModelKey": "moonshot-v1",
        "model": "Moonshot v1",
        "creator": "Moonshot AI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 22,
        "rankingEligible": true,
        "overallRank": 114,
        "url": "https://benchlm.ai/models/moonshot-v1",
        "markdownUrl": "https://benchlm.ai/md/models/moonshot-v1.md",
        "score": 32.7,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 80,
        "slug": "gemini-1-5-pro",
        "canonicalModelKey": "gemini-1-5-pro",
        "model": "Gemini 1.5 Pro",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "2M",
        "contextWindowTokens": 2000000,
        "displayScore": 35,
        "rankingEligible": true,
        "overallRank": 89,
        "url": "https://benchlm.ai/models/gemini-1-5-pro",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-1-5-pro.md",
        "score": 31.9,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 81,
        "slug": "llama-3-70b",
        "canonicalModelKey": "llama-3-70b",
        "model": "Llama 3 70B",
        "creator": "Meta",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 25,
        "rankingEligible": true,
        "overallRank": 105,
        "url": "https://benchlm.ai/models/llama-3-70b",
        "markdownUrl": "https://benchlm.ai/md/models/llama-3-70b.md",
        "score": 26.2,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 82,
        "slug": "phi-4",
        "canonicalModelKey": "phi-4",
        "model": "Phi-4",
        "creator": "Microsoft",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "16K",
        "contextWindowTokens": 16000,
        "displayScore": 27,
        "rankingEligible": true,
        "overallRank": 100,
        "url": "https://benchlm.ai/models/phi-4",
        "markdownUrl": "https://benchlm.ai/md/models/phi-4.md",
        "score": 25.5,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 83,
        "slug": "gemini-1-0-pro",
        "canonicalModelKey": "gemini-1-0-pro",
        "model": "Gemini 1.0 Pro",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 24,
        "rankingEligible": true,
        "overallRank": 108,
        "url": "https://benchlm.ai/models/gemini-1-0-pro",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-1-0-pro.md",
        "score": 24.4,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 84,
        "slug": "gpt-4-turbo",
        "canonicalModelKey": "gpt-4-turbo",
        "model": "GPT-4 Turbo",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 24,
        "rankingEligible": true,
        "overallRank": 107,
        "url": "https://benchlm.ai/models/gpt-4-turbo",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-4-turbo.md",
        "score": 22.4,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 85,
        "slug": "llama-4-behemoth",
        "canonicalModelKey": "llama-4-behemoth",
        "model": "Llama 4 Behemoth",
        "creator": "Meta",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 11,
        "rankingEligible": true,
        "overallRank": 120,
        "url": "https://benchlm.ai/models/llama-4-behemoth",
        "markdownUrl": "https://benchlm.ai/md/models/llama-4-behemoth.md",
        "score": 13.1,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 86,
        "slug": "deepseek-v3-1-reasoning",
        "canonicalModelKey": "deepseek-v3-1-reasoning",
        "model": "DeepSeek V3.1 (Reasoning)",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 29,
        "rankingEligible": true,
        "overallRank": 98,
        "url": "https://benchlm.ai/models/deepseek-v3-1-reasoning",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-v3-1-reasoning.md",
        "score": 11.2,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 87,
        "slug": "gemma-3-27b",
        "canonicalModelKey": "gemma-3-27b",
        "model": "Gemma 3 27B",
        "creator": "Google",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 16,
        "rankingEligible": true,
        "overallRank": 118,
        "url": "https://benchlm.ai/models/gemma-3-27b",
        "markdownUrl": "https://benchlm.ai/md/models/gemma-3-27b.md",
        "score": 11.1,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 88,
        "slug": "nova-pro",
        "canonicalModelKey": "nova-pro",
        "model": "Nova Pro",
        "creator": "Amazon",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 10,
        "rankingEligible": true,
        "overallRank": 121,
        "url": "https://benchlm.ai/models/nova-pro",
        "markdownUrl": "https://benchlm.ai/md/models/nova-pro.md",
        "score": 8.4,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 89,
        "slug": "deepseek-v3-1",
        "canonicalModelKey": "deepseek-v3-1",
        "model": "DeepSeek V3.1",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 24,
        "rankingEligible": true,
        "overallRank": 106,
        "url": "https://benchlm.ai/models/deepseek-v3-1",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-v3-1.md",
        "score": 7.5,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 90,
        "slug": "mistral-7b-v0-3",
        "canonicalModelKey": "mistral-7b-v0-3",
        "model": "Mistral 7B v0.3",
        "creator": "Mistral",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 4,
        "rankingEligible": true,
        "overallRank": 122,
        "url": "https://benchlm.ai/models/mistral-7b-v0-3",
        "markdownUrl": "https://benchlm.ai/md/models/mistral-7b-v0-3.md",
        "score": 7.4,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 91,
        "slug": "deepseek-r1",
        "canonicalModelKey": "deepseek-r1",
        "model": "DeepSeek-R1",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 32,
        "rankingEligible": true,
        "overallRank": 94,
        "url": "https://benchlm.ai/models/deepseek-r1",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-r1.md",
        "score": 6.4,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 92,
        "slug": "gpt-4-1-nano",
        "canonicalModelKey": "gpt-4-1-nano",
        "model": "GPT-4.1 nano",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 27,
        "rankingEligible": true,
        "overallRank": 101,
        "url": "https://benchlm.ai/models/gpt-4-1-nano",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-4-1-nano.md",
        "score": 5.6,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 3,
        "scoreConfidence": 1
      },
      {
        "rank": 93,
        "slug": "llama-4-scout",
        "canonicalModelKey": "llama-4-scout",
        "model": "Llama 4 Scout",
        "creator": "Meta",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "10M",
        "contextWindowTokens": 10000000,
        "displayScore": 25,
        "rankingEligible": true,
        "overallRank": 103,
        "url": "https://benchlm.ai/models/llama-4-scout",
        "markdownUrl": "https://benchlm.ai/md/models/llama-4-scout.md",
        "score": 4.7,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 94,
        "slug": "llama-4-maverick",
        "canonicalModelKey": "llama-4-maverick",
        "model": "Llama 4 Maverick",
        "creator": "Meta",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 17,
        "rankingEligible": true,
        "overallRank": 117,
        "url": "https://benchlm.ai/models/llama-4-maverick",
        "markdownUrl": "https://benchlm.ai/md/models/llama-4-maverick.md",
        "score": 4.7,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 95,
        "slug": "gpt-oss-20b",
        "canonicalModelKey": "gpt-oss-20b",
        "model": "GPT-OSS 20B",
        "creator": "OpenAI",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 16,
        "rankingEligible": true,
        "overallRank": 119,
        "url": "https://benchlm.ai/models/gpt-oss-20b",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-oss-20b.md",
        "score": 4.6,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 96,
        "slug": "glm-4-5-air",
        "canonicalModelKey": "glm-4-5-air",
        "model": "GLM-4.5-Air",
        "creator": "Z.AI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 18,
        "rankingEligible": true,
        "overallRank": 116,
        "url": "https://benchlm.ai/models/glm-4-5-air",
        "markdownUrl": "https://benchlm.ai/md/models/glm-4-5-air.md",
        "score": 2.9,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 97,
        "slug": "mistral-8x7b-v0-2",
        "canonicalModelKey": "mistral-8x7b-v0-2",
        "model": "Mistral 8x7B v0.2",
        "creator": "Mistral",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 1,
        "rankingEligible": true,
        "overallRank": 123,
        "url": "https://benchlm.ai/models/mistral-8x7b-v0-2",
        "markdownUrl": "https://benchlm.ai/md/models/mistral-8x7b-v0-2.md",
        "score": 1.9,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 98,
        "slug": "grok-3-beta",
        "canonicalModelKey": "grok-3-beta",
        "model": "Grok 3 [Beta]",
        "creator": "xAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 30,
        "rankingEligible": true,
        "overallRank": 97,
        "url": "https://benchlm.ai/models/grok-3-beta",
        "markdownUrl": "https://benchlm.ai/md/models/grok-3-beta.md",
        "score": 1.8,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 99,
        "slug": "dbrx-instruct",
        "canonicalModelKey": "dbrx-instruct",
        "model": "DBRX Instruct",
        "creator": "Databricks",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 32,
        "rankingEligible": true,
        "overallRank": 96,
        "url": "https://benchlm.ai/models/dbrx-instruct",
        "markdownUrl": "https://benchlm.ai/md/models/dbrx-instruct.md",
        "score": 0,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 100,
        "slug": "o1-pro",
        "canonicalModelKey": "o1-pro",
        "model": "o1-pro",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 28,
        "rankingEligible": true,
        "overallRank": 99,
        "url": "https://benchlm.ai/models/o1-pro",
        "markdownUrl": "https://benchlm.ai/md/models/o1-pro.md",
        "score": 0,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 101,
        "slug": "glm-4-5",
        "canonicalModelKey": "glm-4-5",
        "model": "GLM-4.5",
        "creator": "Z.AI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 25,
        "rankingEligible": true,
        "overallRank": 102,
        "url": "https://benchlm.ai/models/glm-4-5",
        "markdownUrl": "https://benchlm.ai/md/models/glm-4-5.md",
        "score": 0,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 102,
        "slug": "mixtral-8x22b-instruct-v0-1",
        "canonicalModelKey": "mixtral-8x22b-instruct-v0-1",
        "model": "Mixtral 8x22B Instruct v0.1",
        "creator": "Mistral",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "64K",
        "contextWindowTokens": 64000,
        "displayScore": 22,
        "rankingEligible": true,
        "overallRank": 112,
        "url": "https://benchlm.ai/models/mixtral-8x22b-instruct-v0-1",
        "markdownUrl": "https://benchlm.ai/md/models/mixtral-8x22b-instruct-v0-1.md",
        "score": 0,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 103,
        "slug": "granite-4-0-1b",
        "canonicalModelKey": "granite-4-0-1b",
        "model": "Granite-4.0-1B",
        "creator": "IBM",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 32,
        "rankingEligible": false,
        "overallRank": null,
        "url": "https://benchlm.ai/models/granite-4-0-1b",
        "markdownUrl": "https://benchlm.ai/md/models/granite-4-0-1b.md",
        "score": 0,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 104,
        "slug": "granite-4-0-h-1b",
        "canonicalModelKey": "granite-4-0-h-1b",
        "model": "Granite-4.0-H-1B",
        "creator": "IBM",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 9,
        "rankingEligible": false,
        "overallRank": null,
        "url": "https://benchlm.ai/models/granite-4-0-h-1b",
        "markdownUrl": "https://benchlm.ai/md/models/granite-4-0-h-1b.md",
        "score": 0,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 105,
        "slug": "granite-4-0-350m",
        "canonicalModelKey": "granite-4-0-350m",
        "model": "Granite-4.0-350M",
        "creator": "IBM",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 0,
        "rankingEligible": false,
        "overallRank": null,
        "url": "https://benchlm.ai/models/granite-4-0-350m",
        "markdownUrl": "https://benchlm.ai/md/models/granite-4-0-350m.md",
        "score": 0,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 106,
        "slug": "granite-4-0-h-350m",
        "canonicalModelKey": "granite-4-0-h-350m",
        "model": "Granite-4.0-H-350M",
        "creator": "IBM",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 0,
        "rankingEligible": false,
        "overallRank": null,
        "url": "https://benchlm.ai/models/granite-4-0-h-350m",
        "markdownUrl": "https://benchlm.ai/md/models/granite-4-0-h-350m.md",
        "score": 0,
        "category": "multilingual",
        "categoryLabel": "Multilingual",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      }
    ],
    "instructionFollowing": [
      {
        "rank": 1,
        "slug": "mai-thinking-1",
        "canonicalModelKey": "mai-thinking-1",
        "model": "MAI-Thinking-1",
        "creator": "Microsoft",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "256K",
        "contextWindowTokens": 256000,
        "displayScore": 65,
        "rankingEligible": true,
        "overallRank": 44,
        "url": "https://benchlm.ai/models/mai-thinking-1",
        "markdownUrl": "https://benchlm.ai/md/models/mai-thinking-1.md",
        "score": 100,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 14,
        "scoreConfidence": 3
      },
      {
        "rank": 2,
        "slug": "grok-4-20-multi-agent-beta",
        "canonicalModelKey": "grok-4-20-multi-agent-beta",
        "model": "Grok 4.20 Multi-agent",
        "creator": "xAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "2M",
        "contextWindowTokens": 2000000,
        "displayScore": 70,
        "rankingEligible": false,
        "overallRank": null,
        "url": "https://benchlm.ai/models/grok-4-20-multi-agent-beta",
        "markdownUrl": "https://benchlm.ai/md/models/grok-4-20-multi-agent-beta.md",
        "score": 100,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 3,
        "slug": "kimi-k2-5-reasoning",
        "canonicalModelKey": "kimi-k2-5-reasoning",
        "model": "Kimi K2.5 (Reasoning)",
        "creator": "Moonshot AI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 75,
        "rankingEligible": true,
        "overallRank": 31,
        "url": "https://benchlm.ai/models/kimi-k2-5-reasoning",
        "markdownUrl": "https://benchlm.ai/md/models/kimi-k2-5-reasoning.md",
        "score": 98.9,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 8,
        "scoreConfidence": 2
      },
      {
        "rank": 4,
        "slug": "gpt-5-4",
        "canonicalModelKey": "gpt-5-4",
        "model": "GPT-5.4",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "1.05M",
        "contextWindowTokens": 1050000,
        "displayScore": 88,
        "rankingEligible": true,
        "overallRank": 10,
        "url": "https://benchlm.ai/models/gpt-5-4",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-4.md",
        "score": 96,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 26,
        "scoreConfidence": 4
      },
      {
        "rank": 5,
        "slug": "nemotron-3-ultra",
        "canonicalModelKey": "nemotron-3-ultra-500b",
        "model": "Nemotron 3 Ultra",
        "creator": "NVIDIA",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 68,
        "rankingEligible": true,
        "overallRank": 38,
        "url": "https://benchlm.ai/models/nemotron-3-ultra",
        "markdownUrl": "https://benchlm.ai/md/models/nemotron-3-ultra.md",
        "score": 96,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 18,
        "scoreConfidence": 3
      },
      {
        "rank": 6,
        "slug": "qwen3-7-plus",
        "canonicalModelKey": "qwen3-7-plus",
        "model": "Qwen3.7 Plus",
        "creator": "Alibaba",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 88,
        "rankingEligible": true,
        "overallRank": 11,
        "url": "https://benchlm.ai/models/qwen3-7-plus",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-7-plus.md",
        "score": 95.6,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 50,
        "scoreConfidence": 4
      },
      {
        "rank": 7,
        "slug": "grok-4-20-beta",
        "canonicalModelKey": "grok-4-20-beta",
        "model": "Grok 4.20",
        "creator": "xAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "2M",
        "contextWindowTokens": 2000000,
        "displayScore": 71,
        "rankingEligible": true,
        "overallRank": 35,
        "url": "https://benchlm.ai/models/grok-4-20-beta",
        "markdownUrl": "https://benchlm.ai/md/models/grok-4-20-beta.md",
        "score": 95.4,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 16,
        "scoreConfidence": 3
      },
      {
        "rank": 8,
        "slug": "claude-opus-4-6",
        "canonicalModelKey": "claude-opus-4-6",
        "model": "Claude Opus 4.6",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 86,
        "rankingEligible": true,
        "overallRank": 12,
        "url": "https://benchlm.ai/models/claude-opus-4-6",
        "markdownUrl": "https://benchlm.ai/md/models/claude-opus-4-6.md",
        "score": 95.3,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 27,
        "scoreConfidence": 4
      },
      {
        "rank": 9,
        "slug": "gpt-5-4-pro",
        "canonicalModelKey": "gpt-5-4-pro",
        "model": "GPT-5.4 Pro",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "1.05M",
        "contextWindowTokens": 1050000,
        "displayScore": 90,
        "rankingEligible": true,
        "overallRank": 6,
        "url": "https://benchlm.ai/models/gpt-5-4-pro",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-4-pro.md",
        "score": 93.7,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 9,
        "scoreConfidence": 2
      },
      {
        "rank": 10,
        "slug": "gemini-3-1-pro",
        "canonicalModelKey": "gemini-3-1-pro",
        "model": "Gemini 3.1 Pro",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 91,
        "rankingEligible": true,
        "overallRank": 4,
        "url": "https://benchlm.ai/models/gemini-3-1-pro",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-3-1-pro.md",
        "score": 93.4,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 17,
        "scoreConfidence": 3
      },
      {
        "rank": 11,
        "slug": "qwen3-7-max",
        "canonicalModelKey": "qwen3-7-max",
        "model": "Qwen3.7 Max",
        "creator": "Alibaba",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 91,
        "rankingEligible": true,
        "overallRank": 5,
        "url": "https://benchlm.ai/models/qwen3-7-max",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-7-max.md",
        "score": 93.4,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 33,
        "scoreConfidence": 3
      },
      {
        "rank": 12,
        "slug": "claude-fable",
        "canonicalModelKey": "claude-fable-5",
        "model": "Claude Fable 5",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "1M+",
        "contextWindowTokens": 1000000,
        "displayScore": 97,
        "rankingEligible": true,
        "overallRank": 2,
        "url": "https://benchlm.ai/models/claude-fable",
        "markdownUrl": "https://benchlm.ai/md/models/claude-fable.md",
        "score": 92.7,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 17,
        "scoreConfidence": 3
      },
      {
        "rank": 13,
        "slug": "o1",
        "canonicalModelKey": "o1",
        "model": "o1",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 57,
        "rankingEligible": true,
        "overallRank": 58,
        "url": "https://benchlm.ai/models/o1",
        "markdownUrl": "https://benchlm.ai/md/models/o1.md",
        "score": 92.3,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 3,
        "scoreConfidence": 1
      },
      {
        "rank": 14,
        "slug": "glm-5-1",
        "canonicalModelKey": "glm-5-1",
        "model": "GLM-5.1",
        "creator": "Z.AI",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "203K",
        "contextWindowTokens": 203000,
        "displayScore": 82,
        "rankingEligible": true,
        "overallRank": 17,
        "url": "https://benchlm.ai/models/glm-5-1",
        "markdownUrl": "https://benchlm.ai/md/models/glm-5-1.md",
        "score": 92,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 16,
        "scoreConfidence": 3
      },
      {
        "rank": 15,
        "slug": "qwen3-6-plus",
        "canonicalModelKey": "qwen3-6-plus",
        "model": "Qwen3.6 Plus",
        "creator": "Alibaba",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 66,
        "rankingEligible": true,
        "overallRank": 43,
        "url": "https://benchlm.ai/models/qwen3-6-plus",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-6-plus.md",
        "score": 91.8,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 38,
        "scoreConfidence": 4
      },
      {
        "rank": 16,
        "slug": "gpt-5-2-codex",
        "canonicalModelKey": "gpt-5-2-codex",
        "model": "GPT-5.2-Codex",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "400K",
        "contextWindowTokens": 400000,
        "displayScore": 76,
        "rankingEligible": true,
        "overallRank": 30,
        "url": "https://benchlm.ai/models/gpt-5-2-codex",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-2-codex.md",
        "score": 91.6,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 17,
        "slug": "claude-mythos-5",
        "canonicalModelKey": "claude-mythos-5",
        "model": "Claude Mythos 5",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "1M+",
        "contextWindowTokens": 1000000,
        "displayScore": 99,
        "rankingEligible": true,
        "overallRank": 1,
        "url": "https://benchlm.ai/models/claude-mythos-5",
        "markdownUrl": "https://benchlm.ai/md/models/claude-mythos-5.md",
        "score": 91.4,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 17,
        "scoreConfidence": 3
      },
      {
        "rank": 18,
        "slug": "gpt-5-3-codex",
        "canonicalModelKey": "gpt-5-3-codex",
        "model": "GPT-5.3 Codex",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "400K",
        "contextWindowTokens": 400000,
        "displayScore": 85,
        "rankingEligible": true,
        "overallRank": 15,
        "url": "https://benchlm.ai/models/gpt-5-3-codex",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-3-codex.md",
        "score": 91.4,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 6,
        "scoreConfidence": 1
      },
      {
        "rank": 19,
        "slug": "grok-4-1",
        "canonicalModelKey": "grok-4-1",
        "model": "Grok 4.1",
        "creator": "xAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 89,
        "rankingEligible": true,
        "overallRank": 9,
        "url": "https://benchlm.ai/models/grok-4-1",
        "markdownUrl": "https://benchlm.ai/md/models/grok-4-1.md",
        "score": 90.6,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 20,
        "slug": "qwen3-5-27b",
        "canonicalModelKey": "qwen3-5-27b",
        "model": "Qwen3.5-27B",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "262K",
        "contextWindowTokens": 262000,
        "displayScore": 61,
        "rankingEligible": true,
        "overallRank": 52,
        "url": "https://benchlm.ai/models/qwen3-5-27b",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-5-27b.md",
        "score": 89,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 13,
        "scoreConfidence": 3
      },
      {
        "rank": 21,
        "slug": "gpt-5-1-codex-max",
        "canonicalModelKey": "gpt-5-1-codex-max",
        "model": "GPT-5.1-Codex-Max",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "400K",
        "contextWindowTokens": 400000,
        "displayScore": 75,
        "rankingEligible": true,
        "overallRank": 32,
        "url": "https://benchlm.ai/models/gpt-5-1-codex-max",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-1-codex-max.md",
        "score": 87.9,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 22,
        "slug": "qwen3-5-122b-a10b",
        "canonicalModelKey": "qwen3-5-122b-a10b",
        "model": "Qwen3.5-122B-A10B",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "262K",
        "contextWindowTokens": 262000,
        "displayScore": 63,
        "rankingEligible": true,
        "overallRank": 48,
        "url": "https://benchlm.ai/models/qwen3-5-122b-a10b",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-5-122b-a10b.md",
        "score": 87.8,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 13,
        "scoreConfidence": 3
      },
      {
        "rank": 23,
        "slug": "grok-4-3",
        "canonicalModelKey": "grok-4-3",
        "model": "Grok 4.3",
        "creator": "xAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 74,
        "rankingEligible": false,
        "overallRank": null,
        "url": "https://benchlm.ai/models/grok-4-3",
        "markdownUrl": "https://benchlm.ai/md/models/grok-4-3.md",
        "score": 86.9,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 6,
        "scoreConfidence": 1
      },
      {
        "rank": 24,
        "slug": "o3-mini",
        "canonicalModelKey": "o3-mini",
        "model": "o3-mini",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 55,
        "rankingEligible": true,
        "overallRank": 64,
        "url": "https://benchlm.ai/models/o3-mini",
        "markdownUrl": "https://benchlm.ai/md/models/o3-mini.md",
        "score": 85.3,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 5,
        "scoreConfidence": 1
      },
      {
        "rank": 25,
        "slug": "gpt-5-2",
        "canonicalModelKey": "gpt-5-2",
        "model": "GPT-5.2",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "400K",
        "contextWindowTokens": 400000,
        "displayScore": 78,
        "rankingEligible": true,
        "overallRank": 25,
        "url": "https://benchlm.ai/models/gpt-5-2",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-2.md",
        "score": 84.9,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 11,
        "scoreConfidence": 2
      },
      {
        "rank": 26,
        "slug": "qwen3-5-397b",
        "canonicalModelKey": "qwen3-5-397b",
        "model": "Qwen3.5 397B",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 62,
        "rankingEligible": true,
        "overallRank": 51,
        "url": "https://benchlm.ai/models/qwen3-5-397b",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-5-397b.md",
        "score": 84.8,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 36,
        "scoreConfidence": 4
      },
      {
        "rank": 27,
        "slug": "kimi-k2-5",
        "canonicalModelKey": "kimi-k2-5",
        "model": "Kimi K2.5",
        "creator": "Moonshot AI",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "256K",
        "contextWindowTokens": 256000,
        "displayScore": 63,
        "rankingEligible": true,
        "overallRank": 47,
        "url": "https://benchlm.ai/models/kimi-k2-5",
        "markdownUrl": "https://benchlm.ai/md/models/kimi-k2-5.md",
        "score": 84.5,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 39,
        "scoreConfidence": 4
      },
      {
        "rank": 28,
        "slug": "glm-5",
        "canonicalModelKey": "glm-5",
        "model": "GLM-5",
        "creator": "Z.AI",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 67,
        "rankingEligible": true,
        "overallRank": 42,
        "url": "https://benchlm.ai/models/glm-5",
        "markdownUrl": "https://benchlm.ai/md/models/glm-5.md",
        "score": 84.3,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 33,
        "scoreConfidence": 4
      },
      {
        "rank": 29,
        "slug": "claude-sonnet-4-5",
        "canonicalModelKey": "claude-sonnet-4-5",
        "model": "Claude Sonnet 4.5",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 64,
        "rankingEligible": true,
        "overallRank": 46,
        "url": "https://benchlm.ai/models/claude-sonnet-4-5",
        "markdownUrl": "https://benchlm.ai/md/models/claude-sonnet-4-5.md",
        "score": 84.2,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 6,
        "scoreConfidence": 1
      },
      {
        "rank": 30,
        "slug": "kimi-k2",
        "canonicalModelKey": "kimi-k2",
        "model": "Kimi K2",
        "creator": "Moonshot AI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 41,
        "rankingEligible": true,
        "overallRank": 82,
        "url": "https://benchlm.ai/models/kimi-k2",
        "markdownUrl": "https://benchlm.ai/md/models/kimi-k2.md",
        "score": 83.5,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 31,
        "slug": "claude-sonnet-4-6",
        "canonicalModelKey": "claude-sonnet-4-6",
        "model": "Claude Sonnet 4.6",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 82,
        "rankingEligible": true,
        "overallRank": 18,
        "url": "https://benchlm.ai/models/claude-sonnet-4-6",
        "markdownUrl": "https://benchlm.ai/md/models/claude-sonnet-4-6.md",
        "score": 82.7,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 14,
        "scoreConfidence": 3
      },
      {
        "rank": 32,
        "slug": "mistral-medium-3",
        "canonicalModelKey": "mistral-medium-3",
        "model": "Mistral Medium 3",
        "creator": "Mistral",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 42,
        "rankingEligible": false,
        "overallRank": null,
        "url": "https://benchlm.ai/models/mistral-medium-3",
        "markdownUrl": "https://benchlm.ai/md/models/mistral-medium-3.md",
        "score": 82.1,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 33,
        "slug": "gemini-3-pro-deep-think",
        "canonicalModelKey": "gemini-3-pro-deep-think",
        "model": "Gemini 3 Pro Deep Think",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "2M",
        "contextWindowTokens": 2000000,
        "displayScore": 89,
        "rankingEligible": true,
        "overallRank": 8,
        "url": "https://benchlm.ai/models/gemini-3-pro-deep-think",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-3-pro-deep-think.md",
        "score": 81.6,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 34,
        "slug": "qwen3-5-35b-a3b",
        "canonicalModelKey": "qwen3-5-35b-a3b",
        "model": "Qwen3.5-35B-A3B",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "262K",
        "contextWindowTokens": 262000,
        "displayScore": 55,
        "rankingEligible": true,
        "overallRank": 62,
        "url": "https://benchlm.ai/models/qwen3-5-35b-a3b",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-5-35b-a3b.md",
        "score": 81.2,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 13,
        "scoreConfidence": 3
      },
      {
        "rank": 35,
        "slug": "glm-5-reasoning",
        "canonicalModelKey": "glm-5-reasoning",
        "model": "GLM-5 (Reasoning)",
        "creator": "Z.AI",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 79,
        "rankingEligible": true,
        "overallRank": 24,
        "url": "https://benchlm.ai/models/glm-5-reasoning",
        "markdownUrl": "https://benchlm.ai/md/models/glm-5-reasoning.md",
        "score": 81,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 36,
        "slug": "gpt-5-high",
        "canonicalModelKey": "gpt-5-high",
        "model": "GPT-5 (high)",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 76,
        "rankingEligible": true,
        "overallRank": 29,
        "url": "https://benchlm.ai/models/gpt-5-high",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-high.md",
        "score": 80.7,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 37,
        "slug": "qwen3-5-397b-reasoning",
        "canonicalModelKey": "qwen3-5-397b-reasoning",
        "model": "Qwen3.5 397B (Reasoning)",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 77,
        "rankingEligible": true,
        "overallRank": 26,
        "url": "https://benchlm.ai/models/qwen3-5-397b-reasoning",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-5-397b-reasoning.md",
        "score": 80.6,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 38,
        "slug": "grok-4-1-fast",
        "canonicalModelKey": "grok-4-1-fast",
        "model": "Grok 4.1 Fast",
        "creator": "xAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 68,
        "rankingEligible": true,
        "overallRank": 41,
        "url": "https://benchlm.ai/models/grok-4-1-fast",
        "markdownUrl": "https://benchlm.ai/md/models/grok-4-1-fast.md",
        "score": 79.7,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 39,
        "slug": "gemini-3-pro",
        "canonicalModelKey": "gemini-3-pro",
        "model": "Gemini 3 Pro",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "2M",
        "contextWindowTokens": 2000000,
        "displayScore": 80,
        "rankingEligible": true,
        "overallRank": 22,
        "url": "https://benchlm.ai/models/gemini-3-pro",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-3-pro.md",
        "score": 79.5,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 8,
        "scoreConfidence": 2
      },
      {
        "rank": 40,
        "slug": "qwen3-235b-2507",
        "canonicalModelKey": "qwen3-235b-2507",
        "model": "Qwen3 235B 2507",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 32,
        "rankingEligible": true,
        "overallRank": 95,
        "url": "https://benchlm.ai/models/qwen3-235b-2507",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-235b-2507.md",
        "score": 79.5,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 4,
        "scoreConfidence": 1
      },
      {
        "rank": 41,
        "slug": "gpt-4-1-mini",
        "canonicalModelKey": "gpt-4-1-mini",
        "model": "GPT-4.1 mini",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 45,
        "rankingEligible": true,
        "overallRank": 77,
        "url": "https://benchlm.ai/models/gpt-4-1-mini",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-4-1-mini.md",
        "score": 78.8,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 4,
        "scoreConfidence": 1
      },
      {
        "rank": 42,
        "slug": "gemini-3-5-flash",
        "canonicalModelKey": "gemini-3-5-flash",
        "model": "Gemini 3.5 Flash",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 86,
        "rankingEligible": true,
        "overallRank": 13,
        "url": "https://benchlm.ai/models/gemini-3-5-flash",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-3-5-flash.md",
        "score": 78,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 20,
        "scoreConfidence": 3
      },
      {
        "rank": 43,
        "slug": "o1-preview",
        "canonicalModelKey": "o1-preview",
        "model": "o1-preview",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 82,
        "rankingEligible": true,
        "overallRank": 20,
        "url": "https://benchlm.ai/models/o1-preview",
        "markdownUrl": "https://benchlm.ai/md/models/o1-preview.md",
        "score": 76.9,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 44,
        "slug": "gpt-5-medium",
        "canonicalModelKey": "gpt-5-medium",
        "model": "GPT-5 (medium)",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 70,
        "rankingEligible": true,
        "overallRank": 37,
        "url": "https://benchlm.ai/models/gpt-5-medium",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-medium.md",
        "score": 76.9,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 45,
        "slug": "claude-4-1-opus-thinking",
        "canonicalModelKey": "claude-4-1-opus-thinking",
        "model": "Claude 4.1 Opus Thinking",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 43,
        "rankingEligible": true,
        "overallRank": 80,
        "url": "https://benchlm.ai/models/claude-4-1-opus-thinking",
        "markdownUrl": "https://benchlm.ai/md/models/claude-4-1-opus-thinking.md",
        "score": 76.9,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 46,
        "slug": "minimax-m2-7",
        "canonicalModelKey": "minimax-m2-7",
        "model": "MiniMax M2.7",
        "creator": "MiniMax",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 53,
        "rankingEligible": true,
        "overallRank": 65,
        "url": "https://benchlm.ai/models/minimax-m2-7",
        "markdownUrl": "https://benchlm.ai/md/models/minimax-m2-7.md",
        "score": 76.3,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 17,
        "scoreConfidence": 2
      },
      {
        "rank": 47,
        "slug": "qwen3-235b-2507-reasoning",
        "canonicalModelKey": "qwen3-235b-2507-reasoning",
        "model": "Qwen3 235B 2507 (Reasoning)",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 45,
        "rankingEligible": true,
        "overallRank": 76,
        "url": "https://benchlm.ai/models/qwen3-235b-2507-reasoning",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-235b-2507-reasoning.md",
        "score": 76.2,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 48,
        "slug": "gpt-5-1",
        "canonicalModelKey": "gpt-5-1",
        "model": "GPT-5.1",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 77,
        "rankingEligible": true,
        "overallRank": 27,
        "url": "https://benchlm.ai/models/gpt-5-1",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-1.md",
        "score": 75.8,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 49,
        "slug": "gpt-4-1",
        "canonicalModelKey": "gpt-4-1",
        "model": "GPT-4.1",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 57,
        "rankingEligible": true,
        "overallRank": 56,
        "url": "https://benchlm.ai/models/gpt-4-1",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-4-1.md",
        "score": 74.7,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 4,
        "scoreConfidence": 1
      },
      {
        "rank": 50,
        "slug": "glm-4-7",
        "canonicalModelKey": "glm-4-7",
        "model": "GLM-4.7",
        "creator": "Z.AI",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 68,
        "rankingEligible": true,
        "overallRank": 40,
        "url": "https://benchlm.ai/models/glm-4-7",
        "markdownUrl": "https://benchlm.ai/md/models/glm-4-7.md",
        "score": 72.9,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 7,
        "scoreConfidence": 1
      },
      {
        "rank": 51,
        "slug": "gpt-5-4-mini",
        "canonicalModelKey": "gpt-5-4-mini",
        "model": "GPT-5.4 mini",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "400K",
        "contextWindowTokens": 400000,
        "displayScore": 68,
        "rankingEligible": false,
        "overallRank": null,
        "url": "https://benchlm.ai/models/gpt-5-4-mini",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-4-mini.md",
        "score": 72.2,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 9,
        "scoreConfidence": 2
      },
      {
        "rank": 52,
        "slug": "nemotron-3-nano-omni-30b-a3b",
        "canonicalModelKey": "nemotron-3-nano-omni-30b-a3b",
        "model": "Nemotron 3 Nano Omni 30B A3B",
        "creator": "NVIDIA",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "256K",
        "contextWindowTokens": 256000,
        "displayScore": 48,
        "rankingEligible": false,
        "overallRank": null,
        "url": "https://benchlm.ai/models/nemotron-3-nano-omni-30b-a3b",
        "markdownUrl": "https://benchlm.ai/md/models/nemotron-3-nano-omni-30b-a3b.md",
        "score": 71.3,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 15,
        "scoreConfidence": 3
      },
      {
        "rank": 53,
        "slug": "claude-haiku-4-5",
        "canonicalModelKey": "claude-haiku-4-5",
        "model": "Claude Haiku 4.5",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 56,
        "rankingEligible": true,
        "overallRank": 60,
        "url": "https://benchlm.ai/models/claude-haiku-4-5",
        "markdownUrl": "https://benchlm.ai/md/models/claude-haiku-4-5.md",
        "score": 69.6,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 54,
        "slug": "deepseek-coder-2-0",
        "canonicalModelKey": "deepseek-coder-2-0",
        "model": "DeepSeek Coder 2.0",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 51,
        "rankingEligible": true,
        "overallRank": 66,
        "url": "https://benchlm.ai/models/deepseek-coder-2-0",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-coder-2-0.md",
        "score": 69.6,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 55,
        "slug": "llama-3-1-405b",
        "canonicalModelKey": "llama-3-1-405b",
        "model": "Llama 3.1 405B",
        "creator": "Meta",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 40,
        "rankingEligible": true,
        "overallRank": 83,
        "url": "https://benchlm.ai/models/llama-3-1-405b",
        "markdownUrl": "https://benchlm.ai/md/models/llama-3-1-405b.md",
        "score": 69.6,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 56,
        "slug": "mistral-large-3",
        "canonicalModelKey": "mistral-large-3",
        "model": "Mistral Large 3",
        "creator": "Mistral",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 48,
        "rankingEligible": true,
        "overallRank": 74,
        "url": "https://benchlm.ai/models/mistral-large-3",
        "markdownUrl": "https://benchlm.ai/md/models/mistral-large-3.md",
        "score": 67,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 57,
        "slug": "gemini-3-flash",
        "canonicalModelKey": "gemini-3-flash",
        "model": "Gemini 3 Flash",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 55,
        "rankingEligible": true,
        "overallRank": 63,
        "url": "https://benchlm.ai/models/gemini-3-flash",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-3-flash.md",
        "score": 66.9,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 2,
        "scoreConfidence": 1
      },
      {
        "rank": 58,
        "slug": "o3",
        "canonicalModelKey": "o3",
        "model": "o3",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 56,
        "rankingEligible": true,
        "overallRank": 61,
        "url": "https://benchlm.ai/models/o3",
        "markdownUrl": "https://benchlm.ai/md/models/o3.md",
        "score": 65.9,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 59,
        "slug": "deepseek-llm-2-0",
        "canonicalModelKey": "deepseek-llm-2-0",
        "model": "DeepSeek LLM 2.0",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 50,
        "rankingEligible": true,
        "overallRank": 68,
        "url": "https://benchlm.ai/models/deepseek-llm-2-0",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-llm-2-0.md",
        "score": 65.9,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 60,
        "slug": "qwen2-5-72b",
        "canonicalModelKey": "qwen2-5-72b",
        "model": "Qwen2.5-72B",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 49,
        "rankingEligible": true,
        "overallRank": 72,
        "url": "https://benchlm.ai/models/qwen2-5-72b",
        "markdownUrl": "https://benchlm.ai/md/models/qwen2-5-72b.md",
        "score": 65.9,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 61,
        "slug": "sarvam-105b",
        "canonicalModelKey": "sarvam-105b",
        "model": "Sarvam 105B",
        "creator": "Sarvam",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 39,
        "rankingEligible": true,
        "overallRank": 86,
        "url": "https://benchlm.ai/models/sarvam-105b",
        "markdownUrl": "https://benchlm.ai/md/models/sarvam-105b.md",
        "score": 65.2,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 62,
        "slug": "lfm2-5-8b-a1b",
        "canonicalModelKey": "lfm2-5-8b-a1b",
        "model": "LFM2.5-8B-A1B",
        "creator": "LiquidAI",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 42,
        "rankingEligible": false,
        "overallRank": null,
        "url": "https://benchlm.ai/models/lfm2-5-8b-a1b",
        "markdownUrl": "https://benchlm.ai/md/models/lfm2-5-8b-a1b.md",
        "score": 63.7,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 6,
        "scoreConfidence": 1
      },
      {
        "rank": 63,
        "slug": "claude-opus-4-5",
        "canonicalModelKey": "claude-opus-4-5",
        "model": "Claude Opus 4.5",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 76,
        "rankingEligible": true,
        "overallRank": 28,
        "url": "https://benchlm.ai/models/claude-opus-4-5",
        "markdownUrl": "https://benchlm.ai/md/models/claude-opus-4-5.md",
        "score": 63.2,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 41,
        "scoreConfidence": 4
      },
      {
        "rank": 64,
        "slug": "mimo-v2-flash",
        "canonicalModelKey": "mimo-v2-flash",
        "model": "MiMo-V2-Flash",
        "creator": "Xiaomi",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "256K",
        "contextWindowTokens": 256000,
        "displayScore": 59,
        "rankingEligible": true,
        "overallRank": 54,
        "url": "https://benchlm.ai/models/mimo-v2-flash",
        "markdownUrl": "https://benchlm.ai/md/models/mimo-v2-flash.md",
        "score": 62.3,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 65,
        "slug": "qwen2-5-1m",
        "canonicalModelKey": "qwen2-5-1m",
        "model": "Qwen2.5-1M",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 50,
        "rankingEligible": true,
        "overallRank": 69,
        "url": "https://benchlm.ai/models/qwen2-5-1m",
        "markdownUrl": "https://benchlm.ai/md/models/qwen2-5-1m.md",
        "score": 62.3,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 66,
        "slug": "nemotron-3-super-100b",
        "canonicalModelKey": "nemotron-3-super-100b",
        "model": "Nemotron 3 Super 100B",
        "creator": "NVIDIA",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 43,
        "rankingEligible": true,
        "overallRank": 78,
        "url": "https://benchlm.ai/models/nemotron-3-super-100b",
        "markdownUrl": "https://benchlm.ai/md/models/nemotron-3-super-100b.md",
        "score": 62.3,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 67,
        "slug": "deepseek-v3-2-thinking",
        "canonicalModelKey": "deepseek-v3-2-thinking",
        "model": "DeepSeek V3.2 (Thinking)",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 60,
        "rankingEligible": true,
        "overallRank": 53,
        "url": "https://benchlm.ai/models/deepseek-v3-2-thinking",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-v3-2-thinking.md",
        "score": 62.1,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 68,
        "slug": "deepseek-v3-2",
        "canonicalModelKey": "deepseek-v3-2",
        "model": "DeepSeek V3.2",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 56,
        "rankingEligible": true,
        "overallRank": 59,
        "url": "https://benchlm.ai/models/deepseek-v3-2",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-v3-2.md",
        "score": 61.7,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 4,
        "scoreConfidence": 1
      },
      {
        "rank": 69,
        "slug": "deepseek-v3",
        "canonicalModelKey": "deepseek-v3",
        "model": "DeepSeek V3",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 34,
        "rankingEligible": true,
        "overallRank": 90,
        "url": "https://benchlm.ai/models/deepseek-v3",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-v3.md",
        "score": 61.2,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 70,
        "slug": "gpt-4-1-nano",
        "canonicalModelKey": "gpt-4-1-nano",
        "model": "GPT-4.1 nano",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 27,
        "rankingEligible": true,
        "overallRank": 101,
        "url": "https://benchlm.ai/models/gpt-4-1-nano",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-4-1-nano.md",
        "score": 59.3,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 3,
        "scoreConfidence": 1
      },
      {
        "rank": 71,
        "slug": "gemini-2-5-pro",
        "canonicalModelKey": "gemini-2-5-pro",
        "model": "Gemini 2.5 Pro",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 63,
        "rankingEligible": true,
        "overallRank": 49,
        "url": "https://benchlm.ai/models/gemini-2-5-pro",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-2-5-pro.md",
        "score": 59,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 3,
        "scoreConfidence": 1
      },
      {
        "rank": 72,
        "slug": "claude-4-1-opus",
        "canonicalModelKey": "claude-4-1-opus",
        "model": "Claude 4.1 Opus",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 51,
        "rankingEligible": true,
        "overallRank": 67,
        "url": "https://benchlm.ai/models/claude-4-1-opus",
        "markdownUrl": "https://benchlm.ai/md/models/claude-4-1-opus.md",
        "score": 58.6,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 73,
        "slug": "claude-4-sonnet",
        "canonicalModelKey": "claude-4-sonnet",
        "model": "Claude 4 Sonnet",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 50,
        "rankingEligible": true,
        "overallRank": 70,
        "url": "https://benchlm.ai/models/claude-4-sonnet",
        "markdownUrl": "https://benchlm.ai/md/models/claude-4-sonnet.md",
        "score": 58.6,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 74,
        "slug": "deepseekmath-v2",
        "canonicalModelKey": "deepseekmath-v2",
        "model": "DeepSeekMath V2",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 49,
        "rankingEligible": true,
        "overallRank": 73,
        "url": "https://benchlm.ai/models/deepseekmath-v2",
        "markdownUrl": "https://benchlm.ai/md/models/deepseekmath-v2.md",
        "score": 58.6,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 75,
        "slug": "o4-mini-high",
        "canonicalModelKey": "o4-mini-high",
        "model": "o4-mini (high)",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 43,
        "rankingEligible": true,
        "overallRank": 79,
        "url": "https://benchlm.ai/models/o4-mini-high",
        "markdownUrl": "https://benchlm.ai/md/models/o4-mini-high.md",
        "score": 58.6,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 76,
        "slug": "claude-3-5-sonnet",
        "canonicalModelKey": "claude-3-5-sonnet",
        "model": "Claude 3.5 Sonnet",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 40,
        "rankingEligible": true,
        "overallRank": 84,
        "url": "https://benchlm.ai/models/claude-3-5-sonnet",
        "markdownUrl": "https://benchlm.ai/md/models/claude-3-5-sonnet.md",
        "score": 58.6,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 77,
        "slug": "grok-4",
        "canonicalModelKey": "grok-4",
        "model": "Grok 4",
        "creator": "xAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 63,
        "rankingEligible": true,
        "overallRank": 50,
        "url": "https://benchlm.ai/models/grok-4",
        "markdownUrl": "https://benchlm.ai/md/models/grok-4.md",
        "score": 54.9,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 78,
        "slug": "o3-pro",
        "canonicalModelKey": "o3-pro",
        "model": "o3-pro",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 57,
        "rankingEligible": true,
        "overallRank": 57,
        "url": "https://benchlm.ai/models/o3-pro",
        "markdownUrl": "https://benchlm.ai/md/models/o3-pro.md",
        "score": 54.9,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 79,
        "slug": "gpt-4o",
        "canonicalModelKey": "gpt-4o",
        "model": "GPT-4o",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 42,
        "rankingEligible": true,
        "overallRank": 81,
        "url": "https://benchlm.ai/models/gpt-4o",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-4o.md",
        "score": 54.9,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 80,
        "slug": "deepseek-r1",
        "canonicalModelKey": "deepseek-r1",
        "model": "DeepSeek-R1",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 32,
        "rankingEligible": true,
        "overallRank": 94,
        "url": "https://benchlm.ai/models/deepseek-r1",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-r1.md",
        "score": 54.6,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 81,
        "slug": "ternary-bonsai-8b",
        "canonicalModelKey": "ternary-bonsai-8b",
        "model": "Ternary Bonsai 8B",
        "creator": "Prism ML",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "64K",
        "contextWindowTokens": 64000,
        "displayScore": 44,
        "rankingEligible": false,
        "overallRank": null,
        "url": "https://benchlm.ai/models/ternary-bonsai-8b",
        "markdownUrl": "https://benchlm.ai/md/models/ternary-bonsai-8b.md",
        "score": 54.2,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 82,
        "slug": "mimo-v2-pro",
        "canonicalModelKey": "mimo-v2-pro",
        "model": "MiMo-V2-Pro",
        "creator": "Xiaomi",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 84,
        "rankingEligible": false,
        "overallRank": null,
        "url": "https://benchlm.ai/models/mimo-v2-pro",
        "markdownUrl": "https://benchlm.ai/md/models/mimo-v2-pro.md",
        "score": 52.5,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 83,
        "slug": "mistral-large-2",
        "canonicalModelKey": "mistral-large-2",
        "model": "Mistral Large 2",
        "creator": "Mistral",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 38,
        "rankingEligible": true,
        "overallRank": 87,
        "url": "https://benchlm.ai/models/mistral-large-2",
        "markdownUrl": "https://benchlm.ai/md/models/mistral-large-2.md",
        "score": 50.6,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 84,
        "slug": "mistral-small-4",
        "canonicalModelKey": "mistral-small-4",
        "model": "Mistral Small 4",
        "creator": "Mistral",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "256K",
        "contextWindowTokens": 256000,
        "displayScore": 41,
        "rankingEligible": false,
        "overallRank": null,
        "url": "https://benchlm.ai/models/mistral-small-4",
        "markdownUrl": "https://benchlm.ai/md/models/mistral-small-4.md",
        "score": 48.2,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 85,
        "slug": "gpt-4-turbo",
        "canonicalModelKey": "gpt-4-turbo",
        "model": "GPT-4 Turbo",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 24,
        "rankingEligible": true,
        "overallRank": 107,
        "url": "https://benchlm.ai/models/gpt-4-turbo",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-4-turbo.md",
        "score": 47.6,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 86,
        "slug": "z-1",
        "canonicalModelKey": "z-1",
        "model": "Z-1",
        "creator": "Z",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 23,
        "rankingEligible": true,
        "overallRank": 109,
        "url": "https://benchlm.ai/models/z-1",
        "markdownUrl": "https://benchlm.ai/md/models/z-1.md",
        "score": 47.6,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 87,
        "slug": "bonsai-8b",
        "canonicalModelKey": "bonsai-8b",
        "model": "1-bit Bonsai 8B",
        "creator": "Prism ML",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "64K",
        "contextWindowTokens": 64000,
        "displayScore": 23,
        "rankingEligible": false,
        "overallRank": null,
        "url": "https://benchlm.ai/models/bonsai-8b",
        "markdownUrl": "https://benchlm.ai/md/models/bonsai-8b.md",
        "score": 46.9,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 88,
        "slug": "qwen3-5-flash",
        "canonicalModelKey": "qwen3-5-flash",
        "model": "Qwen3.5 Flash",
        "creator": "Alibaba",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 28,
        "rankingEligible": false,
        "overallRank": null,
        "url": "https://benchlm.ai/models/qwen3-5-flash",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-5-flash.md",
        "score": 46.5,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 89,
        "slug": "zaya1-8b",
        "canonicalModelKey": "zaya1-8b",
        "model": "ZAYA1-8B",
        "creator": "Zyphra",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "131K",
        "contextWindowTokens": 131000,
        "displayScore": 57,
        "rankingEligible": false,
        "overallRank": null,
        "url": "https://benchlm.ai/models/zaya1-8b",
        "markdownUrl": "https://benchlm.ai/md/models/zaya1-8b.md",
        "score": 44.2,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 11,
        "scoreConfidence": 1
      },
      {
        "rank": 90,
        "slug": "gemini-3-1-flash-lite",
        "canonicalModelKey": "gemini-3-1-flash-lite",
        "model": "Gemini 3.1 Flash-Lite",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 47,
        "rankingEligible": true,
        "overallRank": 75,
        "url": "https://benchlm.ai/models/gemini-3-1-flash-lite",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-3-1-flash-lite.md",
        "score": 44,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 2,
        "scoreConfidence": 1
      },
      {
        "rank": 91,
        "slug": "grok-code-fast-1",
        "canonicalModelKey": "grok-code-fast-1",
        "model": "Grok Code Fast 1",
        "creator": "xAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "256K",
        "contextWindowTokens": 256000,
        "displayScore": 39,
        "rankingEligible": true,
        "overallRank": 85,
        "url": "https://benchlm.ai/models/grok-code-fast-1",
        "markdownUrl": "https://benchlm.ai/md/models/grok-code-fast-1.md",
        "score": 44,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 92,
        "slug": "nemotron-4-15b",
        "canonicalModelKey": "nemotron-4-15b",
        "model": "Nemotron-4 15B",
        "creator": "NVIDIA",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 22,
        "rankingEligible": true,
        "overallRank": 113,
        "url": "https://benchlm.ai/models/nemotron-4-15b",
        "markdownUrl": "https://benchlm.ai/md/models/nemotron-4-15b.md",
        "score": 44,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 93,
        "slug": "granite-4-0-1b",
        "canonicalModelKey": "granite-4-0-1b",
        "model": "Granite-4.0-1B",
        "creator": "IBM",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 32,
        "rankingEligible": false,
        "overallRank": null,
        "url": "https://benchlm.ai/models/granite-4-0-1b",
        "markdownUrl": "https://benchlm.ai/md/models/granite-4-0-1b.md",
        "score": 42.2,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 94,
        "slug": "gemini-2-5-flash",
        "canonicalModelKey": "gemini-2-5-flash",
        "model": "Gemini 2.5 Flash",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 37,
        "rankingEligible": true,
        "overallRank": 88,
        "url": "https://benchlm.ai/models/gemini-2-5-flash",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-2-5-flash.md",
        "score": 41.8,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 95,
        "slug": "nemotron-3-nano-30b",
        "canonicalModelKey": "nemotron-3-nano-30b",
        "model": "Nemotron 3 Nano 30B",
        "creator": "NVIDIA",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 25,
        "rankingEligible": true,
        "overallRank": 104,
        "url": "https://benchlm.ai/models/nemotron-3-nano-30b",
        "markdownUrl": "https://benchlm.ai/md/models/nemotron-3-nano-30b.md",
        "score": 40.3,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 96,
        "slug": "mistral-8x7b",
        "canonicalModelKey": "mistral-8x7b",
        "model": "Mistral 8x7B",
        "creator": "Mistral",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 23,
        "rankingEligible": true,
        "overallRank": 110,
        "url": "https://benchlm.ai/models/mistral-8x7b",
        "markdownUrl": "https://benchlm.ai/md/models/mistral-8x7b.md",
        "score": 40.3,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 97,
        "slug": "nemotron-ultra-253b",
        "canonicalModelKey": "nemotron-ultra-253b",
        "model": "Nemotron Ultra 253B",
        "creator": "NVIDIA",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 22,
        "rankingEligible": true,
        "overallRank": 115,
        "url": "https://benchlm.ai/models/nemotron-ultra-253b",
        "markdownUrl": "https://benchlm.ai/md/models/nemotron-ultra-253b.md",
        "score": 40.3,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 98,
        "slug": "gpt-oss-120b",
        "canonicalModelKey": "gpt-oss-120b",
        "model": "GPT-OSS 120B",
        "creator": "OpenAI",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 34,
        "rankingEligible": true,
        "overallRank": 91,
        "url": "https://benchlm.ai/models/gpt-oss-120b",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-oss-120b.md",
        "score": 38.5,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 99,
        "slug": "granite-4-0-h-1b",
        "canonicalModelKey": "granite-4-0-h-1b",
        "model": "Granite-4.0-H-1B",
        "creator": "IBM",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 9,
        "rankingEligible": false,
        "overallRank": null,
        "url": "https://benchlm.ai/models/granite-4-0-h-1b",
        "markdownUrl": "https://benchlm.ai/md/models/granite-4-0-h-1b.md",
        "score": 38,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 100,
        "slug": "gemini-1-5-pro",
        "canonicalModelKey": "gemini-1-5-pro",
        "model": "Gemini 1.5 Pro",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "2M",
        "contextWindowTokens": 2000000,
        "displayScore": 35,
        "rankingEligible": true,
        "overallRank": 89,
        "url": "https://benchlm.ai/models/gemini-1-5-pro",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-1-5-pro.md",
        "score": 36.6,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 101,
        "slug": "claude-3-opus",
        "canonicalModelKey": "claude-3-opus",
        "model": "Claude 3 Opus",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 34,
        "rankingEligible": true,
        "overallRank": 92,
        "url": "https://benchlm.ai/models/claude-3-opus",
        "markdownUrl": "https://benchlm.ai/md/models/claude-3-opus.md",
        "score": 36.6,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 102,
        "slug": "llama-3-70b",
        "canonicalModelKey": "llama-3-70b",
        "model": "Llama 3 70B",
        "creator": "Meta",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 25,
        "rankingEligible": true,
        "overallRank": 105,
        "url": "https://benchlm.ai/models/llama-3-70b",
        "markdownUrl": "https://benchlm.ai/md/models/llama-3-70b.md",
        "score": 36.6,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 103,
        "slug": "gemini-1-0-pro",
        "canonicalModelKey": "gemini-1-0-pro",
        "model": "Gemini 1.0 Pro",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 24,
        "rankingEligible": true,
        "overallRank": 108,
        "url": "https://benchlm.ai/models/gemini-1-0-pro",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-1-0-pro.md",
        "score": 36.6,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 104,
        "slug": "moonshot-v1",
        "canonicalModelKey": "moonshot-v1",
        "model": "Moonshot v1",
        "creator": "Moonshot AI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 22,
        "rankingEligible": true,
        "overallRank": 114,
        "url": "https://benchlm.ai/models/moonshot-v1",
        "markdownUrl": "https://benchlm.ai/md/models/moonshot-v1.md",
        "score": 36.6,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 105,
        "slug": "lfm2-5-350m",
        "canonicalModelKey": "lfm2-5-350m",
        "model": "LFM2.5-350M",
        "creator": "LiquidAI",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 11,
        "rankingEligible": false,
        "overallRank": null,
        "url": "https://benchlm.ai/models/lfm2-5-350m",
        "markdownUrl": "https://benchlm.ai/md/models/lfm2-5-350m.md",
        "score": 36.5,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 106,
        "slug": "mellum2-12b-a2-5b-thinking",
        "canonicalModelKey": "mellum2-12b-a2-5b-thinking",
        "model": "Mellum2-12B-A2.5B-Thinking",
        "creator": "JetBrains",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 59,
        "rankingEligible": false,
        "overallRank": null,
        "url": "https://benchlm.ai/models/mellum2-12b-a2-5b-thinking",
        "markdownUrl": "https://benchlm.ai/md/models/mellum2-12b-a2-5b-thinking.md",
        "score": 34.8,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 6,
        "scoreConfidence": 1
      },
      {
        "rank": 107,
        "slug": "hy3-preview",
        "canonicalModelKey": "hy3-preview",
        "model": "Hy3 Preview",
        "creator": "Tencent",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "256K",
        "contextWindowTokens": 256000,
        "displayScore": 58,
        "rankingEligible": false,
        "overallRank": null,
        "url": "https://benchlm.ai/models/hy3-preview",
        "markdownUrl": "https://benchlm.ai/md/models/hy3-preview.md",
        "score": 33.8,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 8,
        "scoreConfidence": 2
      },
      {
        "rank": 108,
        "slug": "claude-3-haiku",
        "canonicalModelKey": "claude-3-haiku",
        "model": "Claude 3 Haiku",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 23,
        "rankingEligible": true,
        "overallRank": 111,
        "url": "https://benchlm.ai/models/claude-3-haiku",
        "markdownUrl": "https://benchlm.ai/md/models/claude-3-haiku.md",
        "score": 33,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 109,
        "slug": "mellum2-12b-a2-5b-instruct",
        "canonicalModelKey": "mellum2-12b-a2-5b-instruct",
        "model": "Mellum2-12B-A2.5B-Instruct",
        "creator": "JetBrains",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 27,
        "rankingEligible": false,
        "overallRank": null,
        "url": "https://benchlm.ai/models/mellum2-12b-a2-5b-instruct",
        "markdownUrl": "https://benchlm.ai/md/models/mellum2-12b-a2-5b-instruct.md",
        "score": 32.2,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 6,
        "scoreConfidence": 1
      },
      {
        "rank": 110,
        "slug": "minicpm5-1b",
        "canonicalModelKey": "minicpm5-1b",
        "model": "MiniCPM5-1B",
        "creator": "OpenBMB",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "131K",
        "contextWindowTokens": 131000,
        "displayScore": 34,
        "rankingEligible": true,
        "overallRank": 93,
        "url": "https://benchlm.ai/models/minicpm5-1b",
        "markdownUrl": "https://benchlm.ai/md/models/minicpm5-1b.md",
        "score": 31.9,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 14,
        "scoreConfidence": 1
      },
      {
        "rank": 111,
        "slug": "llama-4-behemoth",
        "canonicalModelKey": "llama-4-behemoth",
        "model": "Llama 4 Behemoth",
        "creator": "Meta",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 11,
        "rankingEligible": true,
        "overallRank": 120,
        "url": "https://benchlm.ai/models/llama-4-behemoth",
        "markdownUrl": "https://benchlm.ai/md/models/llama-4-behemoth.md",
        "score": 26.5,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 112,
        "slug": "llama-4-maverick",
        "canonicalModelKey": "llama-4-maverick",
        "model": "Llama 4 Maverick",
        "creator": "Meta",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 17,
        "rankingEligible": true,
        "overallRank": 117,
        "url": "https://benchlm.ai/models/llama-4-maverick",
        "markdownUrl": "https://benchlm.ai/md/models/llama-4-maverick.md",
        "score": 22.7,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 113,
        "slug": "llama-4-scout",
        "canonicalModelKey": "llama-4-scout",
        "model": "Llama 4 Scout",
        "creator": "Meta",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "10M",
        "contextWindowTokens": 10000000,
        "displayScore": 25,
        "rankingEligible": true,
        "overallRank": 103,
        "url": "https://benchlm.ai/models/llama-4-scout",
        "markdownUrl": "https://benchlm.ai/md/models/llama-4-scout.md",
        "score": 18.8,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 114,
        "slug": "ternary-bonsai-4b",
        "canonicalModelKey": "ternary-bonsai-4b",
        "model": "Ternary Bonsai 4B",
        "creator": "Prism ML",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 23,
        "rankingEligible": false,
        "overallRank": null,
        "url": "https://benchlm.ai/models/ternary-bonsai-4b",
        "markdownUrl": "https://benchlm.ai/md/models/ternary-bonsai-4b.md",
        "score": 18.7,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 115,
        "slug": "ling-2-6-flash",
        "canonicalModelKey": "ling-2-6-flash",
        "model": "Ling 2.6 Flash",
        "creator": "InclusionAI",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "262K",
        "contextWindowTokens": 262000,
        "displayScore": 36,
        "rankingEligible": false,
        "overallRank": null,
        "url": "https://benchlm.ai/models/ling-2-6-flash",
        "markdownUrl": "https://benchlm.ai/md/models/ling-2-6-flash.md",
        "score": 14.6,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 3,
        "scoreConfidence": 1
      },
      {
        "rank": 116,
        "slug": "deepseek-v3-1-reasoning",
        "canonicalModelKey": "deepseek-v3-1-reasoning",
        "model": "DeepSeek V3.1 (Reasoning)",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 29,
        "rankingEligible": true,
        "overallRank": 98,
        "url": "https://benchlm.ai/models/deepseek-v3-1-reasoning",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-v3-1-reasoning.md",
        "score": 14.1,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 117,
        "slug": "ternary-bonsai-1-7b",
        "canonicalModelKey": "ternary-bonsai-1-7b",
        "model": "Ternary Bonsai 1.7B",
        "creator": "Prism ML",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 28,
        "rankingEligible": false,
        "overallRank": null,
        "url": "https://benchlm.ai/models/ternary-bonsai-1-7b",
        "markdownUrl": "https://benchlm.ai/md/models/ternary-bonsai-1-7b.md",
        "score": 11.4,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 118,
        "slug": "bonsai-4b",
        "canonicalModelKey": "bonsai-4b",
        "model": "1-bit Bonsai 4B",
        "creator": "Prism ML",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 17,
        "rankingEligible": false,
        "overallRank": null,
        "url": "https://benchlm.ai/models/bonsai-4b",
        "markdownUrl": "https://benchlm.ai/md/models/bonsai-4b.md",
        "score": 9.5,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 119,
        "slug": "glm-4-5",
        "canonicalModelKey": "glm-4-5",
        "model": "GLM-4.5",
        "creator": "Z.AI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 25,
        "rankingEligible": true,
        "overallRank": 102,
        "url": "https://benchlm.ai/models/glm-4-5",
        "markdownUrl": "https://benchlm.ai/md/models/glm-4-5.md",
        "score": 8.5,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 120,
        "slug": "glm-4-5-air",
        "canonicalModelKey": "glm-4-5-air",
        "model": "GLM-4.5-Air",
        "creator": "Z.AI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 18,
        "rankingEligible": true,
        "overallRank": 116,
        "url": "https://benchlm.ai/models/glm-4-5-air",
        "markdownUrl": "https://benchlm.ai/md/models/glm-4-5-air.md",
        "score": 5.4,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 121,
        "slug": "mistral-7b-v0-3",
        "canonicalModelKey": "mistral-7b-v0-3",
        "model": "Mistral 7B v0.3",
        "creator": "Mistral",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 4,
        "rankingEligible": true,
        "overallRank": 122,
        "url": "https://benchlm.ai/models/mistral-7b-v0-3",
        "markdownUrl": "https://benchlm.ai/md/models/mistral-7b-v0-3.md",
        "score": 3.7,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 122,
        "slug": "grok-3-beta",
        "canonicalModelKey": "grok-3-beta",
        "model": "Grok 3 [Beta]",
        "creator": "xAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 30,
        "rankingEligible": true,
        "overallRank": 97,
        "url": "https://benchlm.ai/models/grok-3-beta",
        "markdownUrl": "https://benchlm.ai/md/models/grok-3-beta.md",
        "score": 0,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 123,
        "slug": "deepseek-v3-1",
        "canonicalModelKey": "deepseek-v3-1",
        "model": "DeepSeek V3.1",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 24,
        "rankingEligible": true,
        "overallRank": 106,
        "url": "https://benchlm.ai/models/deepseek-v3-1",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-v3-1.md",
        "score": 0,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 124,
        "slug": "gemma-3-27b",
        "canonicalModelKey": "gemma-3-27b",
        "model": "Gemma 3 27B",
        "creator": "Google",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 16,
        "rankingEligible": true,
        "overallRank": 118,
        "url": "https://benchlm.ai/models/gemma-3-27b",
        "markdownUrl": "https://benchlm.ai/md/models/gemma-3-27b.md",
        "score": 0,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 125,
        "slug": "gpt-oss-20b",
        "canonicalModelKey": "gpt-oss-20b",
        "model": "GPT-OSS 20B",
        "creator": "OpenAI",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 16,
        "rankingEligible": true,
        "overallRank": 119,
        "url": "https://benchlm.ai/models/gpt-oss-20b",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-oss-20b.md",
        "score": 0,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 126,
        "slug": "nova-pro",
        "canonicalModelKey": "nova-pro",
        "model": "Nova Pro",
        "creator": "Amazon",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 10,
        "rankingEligible": true,
        "overallRank": 121,
        "url": "https://benchlm.ai/models/nova-pro",
        "markdownUrl": "https://benchlm.ai/md/models/nova-pro.md",
        "score": 0,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 127,
        "slug": "mistral-8x7b-v0-2",
        "canonicalModelKey": "mistral-8x7b-v0-2",
        "model": "Mistral 8x7B v0.2",
        "creator": "Mistral",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 1,
        "rankingEligible": true,
        "overallRank": 123,
        "url": "https://benchlm.ai/models/mistral-8x7b-v0-2",
        "markdownUrl": "https://benchlm.ai/md/models/mistral-8x7b-v0-2.md",
        "score": 0,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 128,
        "slug": "bonsai-1-7b",
        "canonicalModelKey": "bonsai-1-7b",
        "model": "1-bit Bonsai 1.7B",
        "creator": "Prism ML",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 24,
        "rankingEligible": false,
        "overallRank": null,
        "url": "https://benchlm.ai/models/bonsai-1-7b",
        "markdownUrl": "https://benchlm.ai/md/models/bonsai-1-7b.md",
        "score": 0,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 129,
        "slug": "trinity-large-thinking",
        "canonicalModelKey": "trinity-large-thinking",
        "model": "Trinity-Large-Thinking",
        "creator": "Arcee AI",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "512K",
        "contextWindowTokens": 512000,
        "displayScore": 12,
        "rankingEligible": false,
        "overallRank": null,
        "url": "https://benchlm.ai/models/trinity-large-thinking",
        "markdownUrl": "https://benchlm.ai/md/models/trinity-large-thinking.md",
        "score": 0,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 4,
        "scoreConfidence": 1
      },
      {
        "rank": 130,
        "slug": "lfm2-5-vl-450m",
        "canonicalModelKey": "lfm2-5-vl-450m",
        "model": "LFM2.5-VL-450M",
        "creator": "LiquidAI",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 0,
        "rankingEligible": false,
        "overallRank": null,
        "url": "https://benchlm.ai/models/lfm2-5-vl-450m",
        "markdownUrl": "https://benchlm.ai/md/models/lfm2-5-vl-450m.md",
        "score": 0,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 7,
        "scoreConfidence": 1
      },
      {
        "rank": 131,
        "slug": "granite-4-0-350m",
        "canonicalModelKey": "granite-4-0-350m",
        "model": "Granite-4.0-350M",
        "creator": "IBM",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 0,
        "rankingEligible": false,
        "overallRank": null,
        "url": "https://benchlm.ai/models/granite-4-0-350m",
        "markdownUrl": "https://benchlm.ai/md/models/granite-4-0-350m.md",
        "score": 0,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 132,
        "slug": "granite-4-0-h-350m",
        "canonicalModelKey": "granite-4-0-h-350m",
        "model": "Granite-4.0-H-350M",
        "creator": "IBM",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 0,
        "rankingEligible": false,
        "overallRank": null,
        "url": "https://benchlm.ai/models/granite-4-0-h-350m",
        "markdownUrl": "https://benchlm.ai/md/models/granite-4-0-h-350m.md",
        "score": 0,
        "category": "instructionFollowing",
        "categoryLabel": "Instruction Following",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      }
    ],
    "math": [
      {
        "rank": 1,
        "slug": "gpt-5-3-codex",
        "canonicalModelKey": "gpt-5-3-codex",
        "model": "GPT-5.3 Codex",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "400K",
        "contextWindowTokens": 400000,
        "displayScore": 85,
        "rankingEligible": true,
        "overallRank": 15,
        "url": "https://benchlm.ai/models/gpt-5-3-codex",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-3-codex.md",
        "score": 100,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 6,
        "scoreConfidence": 1
      },
      {
        "rank": 2,
        "slug": "grok-4-1",
        "canonicalModelKey": "grok-4-1",
        "model": "Grok 4.1",
        "creator": "xAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 89,
        "rankingEligible": true,
        "overallRank": 9,
        "url": "https://benchlm.ai/models/grok-4-1",
        "markdownUrl": "https://benchlm.ai/md/models/grok-4-1.md",
        "score": 99.4,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 3,
        "slug": "gpt-5-2-codex",
        "canonicalModelKey": "gpt-5-2-codex",
        "model": "GPT-5.2-Codex",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "400K",
        "contextWindowTokens": 400000,
        "displayScore": 76,
        "rankingEligible": true,
        "overallRank": 30,
        "url": "https://benchlm.ai/models/gpt-5-2-codex",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-2-codex.md",
        "score": 97.7,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 4,
        "slug": "gpt-5-1-codex-max",
        "canonicalModelKey": "gpt-5-1-codex-max",
        "model": "GPT-5.1-Codex-Max",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "400K",
        "contextWindowTokens": 400000,
        "displayScore": 75,
        "rankingEligible": true,
        "overallRank": 32,
        "url": "https://benchlm.ai/models/gpt-5-1-codex-max",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-1-codex-max.md",
        "score": 97.2,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 5,
        "slug": "claude-opus-4-5",
        "canonicalModelKey": "claude-opus-4-5",
        "model": "Claude Opus 4.5",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 76,
        "rankingEligible": true,
        "overallRank": 28,
        "url": "https://benchlm.ai/models/claude-opus-4-5",
        "markdownUrl": "https://benchlm.ai/md/models/claude-opus-4-5.md",
        "score": 94.9,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 41,
        "scoreConfidence": 4
      },
      {
        "rank": 6,
        "slug": "gemini-3-pro-deep-think",
        "canonicalModelKey": "gemini-3-pro-deep-think",
        "model": "Gemini 3 Pro Deep Think",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "2M",
        "contextWindowTokens": 2000000,
        "displayScore": 89,
        "rankingEligible": true,
        "overallRank": 8,
        "url": "https://benchlm.ai/models/gemini-3-pro-deep-think",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-3-pro-deep-think.md",
        "score": 94.8,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 7,
        "slug": "gpt-5-4",
        "canonicalModelKey": "gpt-5-4",
        "model": "GPT-5.4",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "1.05M",
        "contextWindowTokens": 1050000,
        "displayScore": 88,
        "rankingEligible": true,
        "overallRank": 10,
        "url": "https://benchlm.ai/models/gpt-5-4",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-4.md",
        "score": 94.4,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 26,
        "scoreConfidence": 4
      },
      {
        "rank": 8,
        "slug": "o1-preview",
        "canonicalModelKey": "o1-preview",
        "model": "o1-preview",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 82,
        "rankingEligible": true,
        "overallRank": 20,
        "url": "https://benchlm.ai/models/o1-preview",
        "markdownUrl": "https://benchlm.ai/md/models/o1-preview.md",
        "score": 94.1,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 9,
        "slug": "grok-4-1-fast",
        "canonicalModelKey": "grok-4-1-fast",
        "model": "Grok 4.1 Fast",
        "creator": "xAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 68,
        "rankingEligible": true,
        "overallRank": 41,
        "url": "https://benchlm.ai/models/grok-4-1-fast",
        "markdownUrl": "https://benchlm.ai/md/models/grok-4-1-fast.md",
        "score": 93.7,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 10,
        "slug": "qwen3-5-397b-reasoning",
        "canonicalModelKey": "qwen3-5-397b-reasoning",
        "model": "Qwen3.5 397B (Reasoning)",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 77,
        "rankingEligible": true,
        "overallRank": 26,
        "url": "https://benchlm.ai/models/qwen3-5-397b-reasoning",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-5-397b-reasoning.md",
        "score": 92.3,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 11,
        "slug": "glm-5-reasoning",
        "canonicalModelKey": "glm-5-reasoning",
        "model": "GLM-5 (Reasoning)",
        "creator": "Z.AI",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 79,
        "rankingEligible": true,
        "overallRank": 24,
        "url": "https://benchlm.ai/models/glm-5-reasoning",
        "markdownUrl": "https://benchlm.ai/md/models/glm-5-reasoning.md",
        "score": 92.2,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 12,
        "slug": "gpt-5-medium",
        "canonicalModelKey": "gpt-5-medium",
        "model": "GPT-5 (medium)",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 70,
        "rankingEligible": true,
        "overallRank": 37,
        "url": "https://benchlm.ai/models/gpt-5-medium",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-medium.md",
        "score": 91.7,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 13,
        "slug": "glm-5",
        "canonicalModelKey": "glm-5",
        "model": "GLM-5",
        "creator": "Z.AI",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 67,
        "rankingEligible": true,
        "overallRank": 42,
        "url": "https://benchlm.ai/models/glm-5",
        "markdownUrl": "https://benchlm.ai/md/models/glm-5.md",
        "score": 91.3,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 33,
        "scoreConfidence": 4
      },
      {
        "rank": 14,
        "slug": "sarvam-105b",
        "canonicalModelKey": "sarvam-105b",
        "model": "Sarvam 105B",
        "creator": "Sarvam",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 39,
        "rankingEligible": true,
        "overallRank": 86,
        "url": "https://benchlm.ai/models/sarvam-105b",
        "markdownUrl": "https://benchlm.ai/md/models/sarvam-105b.md",
        "score": 90.4,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 15,
        "slug": "glm-5-1",
        "canonicalModelKey": "glm-5-1",
        "model": "GLM-5.1",
        "creator": "Z.AI",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "203K",
        "contextWindowTokens": 203000,
        "displayScore": 82,
        "rankingEligible": true,
        "overallRank": 17,
        "url": "https://benchlm.ai/models/glm-5-1",
        "markdownUrl": "https://benchlm.ai/md/models/glm-5-1.md",
        "score": 89.2,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 16,
        "scoreConfidence": 3
      },
      {
        "rank": 16,
        "slug": "claude-sonnet-4-5",
        "canonicalModelKey": "claude-sonnet-4-5",
        "model": "Claude Sonnet 4.5",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 64,
        "rankingEligible": true,
        "overallRank": 46,
        "url": "https://benchlm.ai/models/claude-sonnet-4-5",
        "markdownUrl": "https://benchlm.ai/md/models/claude-sonnet-4-5.md",
        "score": 87.7,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 6,
        "scoreConfidence": 1
      },
      {
        "rank": 17,
        "slug": "o3-pro",
        "canonicalModelKey": "o3-pro",
        "model": "o3-pro",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 57,
        "rankingEligible": true,
        "overallRank": 57,
        "url": "https://benchlm.ai/models/o3-pro",
        "markdownUrl": "https://benchlm.ai/md/models/o3-pro.md",
        "score": 86.4,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 18,
        "slug": "claude-opus-4-6",
        "canonicalModelKey": "claude-opus-4-6",
        "model": "Claude Opus 4.6",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 86,
        "rankingEligible": true,
        "overallRank": 12,
        "url": "https://benchlm.ai/models/claude-opus-4-6",
        "markdownUrl": "https://benchlm.ai/md/models/claude-opus-4-6.md",
        "score": 86.3,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 27,
        "scoreConfidence": 4
      },
      {
        "rank": 19,
        "slug": "o3",
        "canonicalModelKey": "o3",
        "model": "o3",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 56,
        "rankingEligible": true,
        "overallRank": 61,
        "url": "https://benchlm.ai/models/o3",
        "markdownUrl": "https://benchlm.ai/md/models/o3.md",
        "score": 83.4,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 20,
        "slug": "mimo-v2-flash",
        "canonicalModelKey": "mimo-v2-flash",
        "model": "MiMo-V2-Flash",
        "creator": "Xiaomi",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "256K",
        "contextWindowTokens": 256000,
        "displayScore": 59,
        "rankingEligible": true,
        "overallRank": 54,
        "url": "https://benchlm.ai/models/mimo-v2-flash",
        "markdownUrl": "https://benchlm.ai/md/models/mimo-v2-flash.md",
        "score": 82.1,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 21,
        "slug": "sarvam-30b",
        "canonicalModelKey": "sarvam-30b",
        "model": "Sarvam 30B",
        "creator": "Sarvam",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "64K",
        "contextWindowTokens": 64000,
        "displayScore": 40,
        "rankingEligible": false,
        "overallRank": null,
        "url": "https://benchlm.ai/models/sarvam-30b",
        "markdownUrl": "https://benchlm.ai/md/models/sarvam-30b.md",
        "score": 81.2,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 22,
        "slug": "gpt-5-2",
        "canonicalModelKey": "gpt-5-2",
        "model": "GPT-5.2",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "400K",
        "contextWindowTokens": 400000,
        "displayScore": 78,
        "rankingEligible": true,
        "overallRank": 25,
        "url": "https://benchlm.ai/models/gpt-5-2",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-2.md",
        "score": 80.9,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 11,
        "scoreConfidence": 2
      },
      {
        "rank": 23,
        "slug": "gemini-3-pro",
        "canonicalModelKey": "gemini-3-pro",
        "model": "Gemini 3 Pro",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "2M",
        "contextWindowTokens": 2000000,
        "displayScore": 80,
        "rankingEligible": true,
        "overallRank": 22,
        "url": "https://benchlm.ai/models/gemini-3-pro",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-3-pro.md",
        "score": 80.3,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 8,
        "scoreConfidence": 2
      },
      {
        "rank": 24,
        "slug": "grok-4",
        "canonicalModelKey": "grok-4",
        "model": "Grok 4",
        "creator": "xAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 63,
        "rankingEligible": true,
        "overallRank": 50,
        "url": "https://benchlm.ai/models/grok-4",
        "markdownUrl": "https://benchlm.ai/md/models/grok-4.md",
        "score": 80,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 25,
        "slug": "glm-4-7",
        "canonicalModelKey": "glm-4-7",
        "model": "GLM-4.7",
        "creator": "Z.AI",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 68,
        "rankingEligible": true,
        "overallRank": 40,
        "url": "https://benchlm.ai/models/glm-4-7",
        "markdownUrl": "https://benchlm.ai/md/models/glm-4-7.md",
        "score": 78.2,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 7,
        "scoreConfidence": 1
      },
      {
        "rank": 26,
        "slug": "qwen2-5-1m",
        "canonicalModelKey": "qwen2-5-1m",
        "model": "Qwen2.5-1M",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 50,
        "rankingEligible": true,
        "overallRank": 69,
        "url": "https://benchlm.ai/models/qwen2-5-1m",
        "markdownUrl": "https://benchlm.ai/md/models/qwen2-5-1m.md",
        "score": 76.9,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 27,
        "slug": "qwen2-5-72b",
        "canonicalModelKey": "qwen2-5-72b",
        "model": "Qwen2.5-72B",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 49,
        "rankingEligible": true,
        "overallRank": 72,
        "url": "https://benchlm.ai/models/qwen2-5-72b",
        "markdownUrl": "https://benchlm.ai/md/models/qwen2-5-72b.md",
        "score": 76.2,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 28,
        "slug": "claude-sonnet-4-6",
        "canonicalModelKey": "claude-sonnet-4-6",
        "model": "Claude Sonnet 4.6",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 82,
        "rankingEligible": true,
        "overallRank": 18,
        "url": "https://benchlm.ai/models/claude-sonnet-4-6",
        "markdownUrl": "https://benchlm.ai/md/models/claude-sonnet-4-6.md",
        "score": 75.7,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 14,
        "scoreConfidence": 3
      },
      {
        "rank": 29,
        "slug": "deepseekmath-v2",
        "canonicalModelKey": "deepseekmath-v2",
        "model": "DeepSeekMath V2",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 49,
        "rankingEligible": true,
        "overallRank": 73,
        "url": "https://benchlm.ai/models/deepseekmath-v2",
        "markdownUrl": "https://benchlm.ai/md/models/deepseekmath-v2.md",
        "score": 74.7,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 30,
        "slug": "gemini-2-5-pro",
        "canonicalModelKey": "gemini-2-5-pro",
        "model": "Gemini 2.5 Pro",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 63,
        "rankingEligible": true,
        "overallRank": 49,
        "url": "https://benchlm.ai/models/gemini-2-5-pro",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-2-5-pro.md",
        "score": 73.5,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 3,
        "scoreConfidence": 1
      },
      {
        "rank": 31,
        "slug": "qwen3-5-397b",
        "canonicalModelKey": "qwen3-5-397b",
        "model": "Qwen3.5 397B",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 62,
        "rankingEligible": true,
        "overallRank": 51,
        "url": "https://benchlm.ai/models/qwen3-5-397b",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-5-397b.md",
        "score": 73.3,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 36,
        "scoreConfidence": 4
      },
      {
        "rank": 32,
        "slug": "deepseek-coder-2-0",
        "canonicalModelKey": "deepseek-coder-2-0",
        "model": "DeepSeek Coder 2.0",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 51,
        "rankingEligible": true,
        "overallRank": 66,
        "url": "https://benchlm.ai/models/deepseek-coder-2-0",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-coder-2-0.md",
        "score": 70.9,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 33,
        "slug": "deepseek-llm-2-0",
        "canonicalModelKey": "deepseek-llm-2-0",
        "model": "DeepSeek LLM 2.0",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 50,
        "rankingEligible": true,
        "overallRank": 68,
        "url": "https://benchlm.ai/models/deepseek-llm-2-0",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-llm-2-0.md",
        "score": 70.8,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 34,
        "slug": "deepseek-v3-2",
        "canonicalModelKey": "deepseek-v3-2",
        "model": "DeepSeek V3.2",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 56,
        "rankingEligible": true,
        "overallRank": 59,
        "url": "https://benchlm.ai/models/deepseek-v3-2",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-v3-2.md",
        "score": 70.5,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 4,
        "scoreConfidence": 1
      },
      {
        "rank": 35,
        "slug": "gpt-5-high",
        "canonicalModelKey": "gpt-5-high",
        "model": "GPT-5 (high)",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 76,
        "rankingEligible": true,
        "overallRank": 29,
        "url": "https://benchlm.ai/models/gpt-5-high",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-high.md",
        "score": 70.2,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 36,
        "slug": "gpt-5-1",
        "canonicalModelKey": "gpt-5-1",
        "model": "GPT-5.1",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 77,
        "rankingEligible": true,
        "overallRank": 27,
        "url": "https://benchlm.ai/models/gpt-5-1",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-5-1.md",
        "score": 68.2,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 37,
        "slug": "gemini-3-1-pro",
        "canonicalModelKey": "gemini-3-1-pro",
        "model": "Gemini 3.1 Pro",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 91,
        "rankingEligible": true,
        "overallRank": 4,
        "url": "https://benchlm.ai/models/gemini-3-1-pro",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-3-1-pro.md",
        "score": 67.7,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 17,
        "scoreConfidence": 3
      },
      {
        "rank": 38,
        "slug": "kimi-k2-5-reasoning",
        "canonicalModelKey": "kimi-k2-5-reasoning",
        "model": "Kimi K2.5 (Reasoning)",
        "creator": "Moonshot AI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 75,
        "rankingEligible": true,
        "overallRank": 31,
        "url": "https://benchlm.ai/models/kimi-k2-5-reasoning",
        "markdownUrl": "https://benchlm.ai/md/models/kimi-k2-5-reasoning.md",
        "score": 67.2,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 8,
        "scoreConfidence": 2
      },
      {
        "rank": 39,
        "slug": "claude-4-1-opus",
        "canonicalModelKey": "claude-4-1-opus",
        "model": "Claude 4.1 Opus",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 51,
        "rankingEligible": true,
        "overallRank": 67,
        "url": "https://benchlm.ai/models/claude-4-1-opus",
        "markdownUrl": "https://benchlm.ai/md/models/claude-4-1-opus.md",
        "score": 64.8,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 40,
        "slug": "mistral-large-3",
        "canonicalModelKey": "mistral-large-3",
        "model": "Mistral Large 3",
        "creator": "Mistral",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 48,
        "rankingEligible": true,
        "overallRank": 74,
        "url": "https://benchlm.ai/models/mistral-large-3",
        "markdownUrl": "https://benchlm.ai/md/models/mistral-large-3.md",
        "score": 64.3,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 41,
        "slug": "claude-4-sonnet",
        "canonicalModelKey": "claude-4-sonnet",
        "model": "Claude 4 Sonnet",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 50,
        "rankingEligible": true,
        "overallRank": 70,
        "url": "https://benchlm.ai/models/claude-4-sonnet",
        "markdownUrl": "https://benchlm.ai/md/models/claude-4-sonnet.md",
        "score": 61.1,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 42,
        "slug": "llama-3-1-405b",
        "canonicalModelKey": "llama-3-1-405b",
        "model": "Llama 3.1 405B",
        "creator": "Meta",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 40,
        "rankingEligible": true,
        "overallRank": 83,
        "url": "https://benchlm.ai/models/llama-3-1-405b",
        "markdownUrl": "https://benchlm.ai/md/models/llama-3-1-405b.md",
        "score": 58,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 43,
        "slug": "kimi-k2-5",
        "canonicalModelKey": "kimi-k2-5",
        "model": "Kimi K2.5",
        "creator": "Moonshot AI",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "256K",
        "contextWindowTokens": 256000,
        "displayScore": 63,
        "rankingEligible": true,
        "overallRank": 47,
        "url": "https://benchlm.ai/models/kimi-k2-5",
        "markdownUrl": "https://benchlm.ai/md/models/kimi-k2-5.md",
        "score": 56.8,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 39,
        "scoreConfidence": 4
      },
      {
        "rank": 44,
        "slug": "claude-haiku-4-5",
        "canonicalModelKey": "claude-haiku-4-5",
        "model": "Claude Haiku 4.5",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 56,
        "rankingEligible": true,
        "overallRank": 60,
        "url": "https://benchlm.ai/models/claude-haiku-4-5",
        "markdownUrl": "https://benchlm.ai/md/models/claude-haiku-4-5.md",
        "score": 55,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 45,
        "slug": "o4-mini-high",
        "canonicalModelKey": "o4-mini-high",
        "model": "o4-mini (high)",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 43,
        "rankingEligible": true,
        "overallRank": 79,
        "url": "https://benchlm.ai/models/o4-mini-high",
        "markdownUrl": "https://benchlm.ai/md/models/o4-mini-high.md",
        "score": 54.3,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 46,
        "slug": "claude-4-1-opus-thinking",
        "canonicalModelKey": "claude-4-1-opus-thinking",
        "model": "Claude 4.1 Opus Thinking",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 43,
        "rankingEligible": true,
        "overallRank": 80,
        "url": "https://benchlm.ai/models/claude-4-1-opus-thinking",
        "markdownUrl": "https://benchlm.ai/md/models/claude-4-1-opus-thinking.md",
        "score": 52.9,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 47,
        "slug": "nemotron-3-super-100b",
        "canonicalModelKey": "nemotron-3-super-100b",
        "model": "Nemotron 3 Super 100B",
        "creator": "NVIDIA",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 43,
        "rankingEligible": true,
        "overallRank": 78,
        "url": "https://benchlm.ai/models/nemotron-3-super-100b",
        "markdownUrl": "https://benchlm.ai/md/models/nemotron-3-super-100b.md",
        "score": 52.5,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 48,
        "slug": "gemini-3-flash",
        "canonicalModelKey": "gemini-3-flash",
        "model": "Gemini 3 Flash",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 55,
        "rankingEligible": true,
        "overallRank": 63,
        "url": "https://benchlm.ai/models/gemini-3-flash",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-3-flash.md",
        "score": 52.3,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 2,
        "scoreConfidence": 1
      },
      {
        "rank": 49,
        "slug": "gpt-4o",
        "canonicalModelKey": "gpt-4o",
        "model": "GPT-4o",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 42,
        "rankingEligible": true,
        "overallRank": 81,
        "url": "https://benchlm.ai/models/gpt-4o",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-4o.md",
        "score": 52,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 50,
        "slug": "deepseek-v3-2-thinking",
        "canonicalModelKey": "deepseek-v3-2-thinking",
        "model": "DeepSeek V3.2 (Thinking)",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 60,
        "rankingEligible": true,
        "overallRank": 53,
        "url": "https://benchlm.ai/models/deepseek-v3-2-thinking",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-v3-2-thinking.md",
        "score": 51.7,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 51,
        "slug": "kimi-k2",
        "canonicalModelKey": "kimi-k2",
        "model": "Kimi K2",
        "creator": "Moonshot AI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 41,
        "rankingEligible": true,
        "overallRank": 82,
        "url": "https://benchlm.ai/models/kimi-k2",
        "markdownUrl": "https://benchlm.ai/md/models/kimi-k2.md",
        "score": 51.3,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 52,
        "slug": "claude-3-5-sonnet",
        "canonicalModelKey": "claude-3-5-sonnet",
        "model": "Claude 3.5 Sonnet",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 40,
        "rankingEligible": true,
        "overallRank": 84,
        "url": "https://benchlm.ai/models/claude-3-5-sonnet",
        "markdownUrl": "https://benchlm.ai/md/models/claude-3-5-sonnet.md",
        "score": 50.8,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 53,
        "slug": "mistral-large-2",
        "canonicalModelKey": "mistral-large-2",
        "model": "Mistral Large 2",
        "creator": "Mistral",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 38,
        "rankingEligible": true,
        "overallRank": 87,
        "url": "https://benchlm.ai/models/mistral-large-2",
        "markdownUrl": "https://benchlm.ai/md/models/mistral-large-2.md",
        "score": 47.8,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 54,
        "slug": "mistral-8x7b",
        "canonicalModelKey": "mistral-8x7b",
        "model": "Mistral 8x7B",
        "creator": "Mistral",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 23,
        "rankingEligible": true,
        "overallRank": 110,
        "url": "https://benchlm.ai/models/mistral-8x7b",
        "markdownUrl": "https://benchlm.ai/md/models/mistral-8x7b.md",
        "score": 46.9,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 55,
        "slug": "grok-code-fast-1",
        "canonicalModelKey": "grok-code-fast-1",
        "model": "Grok Code Fast 1",
        "creator": "xAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "256K",
        "contextWindowTokens": 256000,
        "displayScore": 39,
        "rankingEligible": true,
        "overallRank": 85,
        "url": "https://benchlm.ai/models/grok-code-fast-1",
        "markdownUrl": "https://benchlm.ai/md/models/grok-code-fast-1.md",
        "score": 45.6,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 56,
        "slug": "gemini-1-5-pro",
        "canonicalModelKey": "gemini-1-5-pro",
        "model": "Gemini 1.5 Pro",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "2M",
        "contextWindowTokens": 2000000,
        "displayScore": 35,
        "rankingEligible": true,
        "overallRank": 89,
        "url": "https://benchlm.ai/models/gemini-1-5-pro",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-1-5-pro.md",
        "score": 45.6,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 57,
        "slug": "gemini-3-1-flash-lite",
        "canonicalModelKey": "gemini-3-1-flash-lite",
        "model": "Gemini 3.1 Flash-Lite",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 47,
        "rankingEligible": true,
        "overallRank": 75,
        "url": "https://benchlm.ai/models/gemini-3-1-flash-lite",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-3-1-flash-lite.md",
        "score": 43.3,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 2,
        "scoreConfidence": 1
      },
      {
        "rank": 58,
        "slug": "gemini-1-0-pro",
        "canonicalModelKey": "gemini-1-0-pro",
        "model": "Gemini 1.0 Pro",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 24,
        "rankingEligible": true,
        "overallRank": 108,
        "url": "https://benchlm.ai/models/gemini-1-0-pro",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-1-0-pro.md",
        "score": 42.6,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 59,
        "slug": "claude-3-opus",
        "canonicalModelKey": "claude-3-opus",
        "model": "Claude 3 Opus",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 34,
        "rankingEligible": true,
        "overallRank": 92,
        "url": "https://benchlm.ai/models/claude-3-opus",
        "markdownUrl": "https://benchlm.ai/md/models/claude-3-opus.md",
        "score": 42,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 60,
        "slug": "qwen3-235b-2507-reasoning",
        "canonicalModelKey": "qwen3-235b-2507-reasoning",
        "model": "Qwen3 235B 2507 (Reasoning)",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 45,
        "rankingEligible": true,
        "overallRank": 76,
        "url": "https://benchlm.ai/models/qwen3-235b-2507-reasoning",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-235b-2507-reasoning.md",
        "score": 40.3,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 61,
        "slug": "gpt-4-turbo",
        "canonicalModelKey": "gpt-4-turbo",
        "model": "GPT-4 Turbo",
        "creator": "OpenAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 24,
        "rankingEligible": true,
        "overallRank": 107,
        "url": "https://benchlm.ai/models/gpt-4-turbo",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-4-turbo.md",
        "score": 39.6,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 62,
        "slug": "llama-3-70b",
        "canonicalModelKey": "llama-3-70b",
        "model": "Llama 3 70B",
        "creator": "Meta",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 25,
        "rankingEligible": true,
        "overallRank": 105,
        "url": "https://benchlm.ai/models/llama-3-70b",
        "markdownUrl": "https://benchlm.ai/md/models/llama-3-70b.md",
        "score": 37.2,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 63,
        "slug": "nemotron-3-nano-30b",
        "canonicalModelKey": "nemotron-3-nano-30b",
        "model": "Nemotron 3 Nano 30B",
        "creator": "NVIDIA",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 25,
        "rankingEligible": true,
        "overallRank": 104,
        "url": "https://benchlm.ai/models/nemotron-3-nano-30b",
        "markdownUrl": "https://benchlm.ai/md/models/nemotron-3-nano-30b.md",
        "score": 37.1,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 64,
        "slug": "minicpm5-1b",
        "canonicalModelKey": "minicpm5-1b",
        "model": "MiniCPM5-1B",
        "creator": "OpenBMB",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "131K",
        "contextWindowTokens": 131000,
        "displayScore": 34,
        "rankingEligible": true,
        "overallRank": 93,
        "url": "https://benchlm.ai/models/minicpm5-1b",
        "markdownUrl": "https://benchlm.ai/md/models/minicpm5-1b.md",
        "score": 37,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 14,
        "scoreConfidence": 1
      },
      {
        "rank": 65,
        "slug": "lfm2-5-8b-a1b",
        "canonicalModelKey": "lfm2-5-8b-a1b",
        "model": "LFM2.5-8B-A1B",
        "creator": "LiquidAI",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 42,
        "rankingEligible": false,
        "overallRank": null,
        "url": "https://benchlm.ai/models/lfm2-5-8b-a1b",
        "markdownUrl": "https://benchlm.ai/md/models/lfm2-5-8b-a1b.md",
        "score": 36.5,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 6,
        "scoreConfidence": 1
      },
      {
        "rank": 66,
        "slug": "claude-3-haiku",
        "canonicalModelKey": "claude-3-haiku",
        "model": "Claude 3 Haiku",
        "creator": "Anthropic",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "200K",
        "contextWindowTokens": 200000,
        "displayScore": 23,
        "rankingEligible": true,
        "overallRank": 111,
        "url": "https://benchlm.ai/models/claude-3-haiku",
        "markdownUrl": "https://benchlm.ai/md/models/claude-3-haiku.md",
        "score": 34.7,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 67,
        "slug": "deepseek-r1",
        "canonicalModelKey": "deepseek-r1",
        "model": "DeepSeek-R1",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 32,
        "rankingEligible": true,
        "overallRank": 94,
        "url": "https://benchlm.ai/models/deepseek-r1",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-r1.md",
        "score": 33.6,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 68,
        "slug": "nemotron-4-15b",
        "canonicalModelKey": "nemotron-4-15b",
        "model": "Nemotron-4 15B",
        "creator": "NVIDIA",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 22,
        "rankingEligible": true,
        "overallRank": 113,
        "url": "https://benchlm.ai/models/nemotron-4-15b",
        "markdownUrl": "https://benchlm.ai/md/models/nemotron-4-15b.md",
        "score": 32.3,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 69,
        "slug": "moonshot-v1",
        "canonicalModelKey": "moonshot-v1",
        "model": "Moonshot v1",
        "creator": "Moonshot AI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 22,
        "rankingEligible": true,
        "overallRank": 114,
        "url": "https://benchlm.ai/models/moonshot-v1",
        "markdownUrl": "https://benchlm.ai/md/models/moonshot-v1.md",
        "score": 31.6,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 70,
        "slug": "z-1",
        "canonicalModelKey": "z-1",
        "model": "Z-1",
        "creator": "Z",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 23,
        "rankingEligible": true,
        "overallRank": 109,
        "url": "https://benchlm.ai/models/z-1",
        "markdownUrl": "https://benchlm.ai/md/models/z-1.md",
        "score": 31,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 71,
        "slug": "nemotron-ultra-253b",
        "canonicalModelKey": "nemotron-ultra-253b",
        "model": "Nemotron Ultra 253B",
        "creator": "NVIDIA",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 22,
        "rankingEligible": true,
        "overallRank": 115,
        "url": "https://benchlm.ai/models/nemotron-ultra-253b",
        "markdownUrl": "https://benchlm.ai/md/models/nemotron-ultra-253b.md",
        "score": 27.9,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 72,
        "slug": "gemini-2-5-flash",
        "canonicalModelKey": "gemini-2-5-flash",
        "model": "Gemini 2.5 Flash",
        "creator": "Google",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 37,
        "rankingEligible": true,
        "overallRank": 88,
        "url": "https://benchlm.ai/models/gemini-2-5-flash",
        "markdownUrl": "https://benchlm.ai/md/models/gemini-2-5-flash.md",
        "score": 27.3,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 73,
        "slug": "gpt-oss-120b",
        "canonicalModelKey": "gpt-oss-120b",
        "model": "GPT-OSS 120B",
        "creator": "OpenAI",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 34,
        "rankingEligible": true,
        "overallRank": 91,
        "url": "https://benchlm.ai/models/gpt-oss-120b",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-oss-120b.md",
        "score": 26.3,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 74,
        "slug": "qwen3-235b-2507",
        "canonicalModelKey": "qwen3-235b-2507",
        "model": "Qwen3 235B 2507",
        "creator": "Alibaba",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 32,
        "rankingEligible": true,
        "overallRank": 95,
        "url": "https://benchlm.ai/models/qwen3-235b-2507",
        "markdownUrl": "https://benchlm.ai/md/models/qwen3-235b-2507.md",
        "score": 24.6,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 4,
        "scoreConfidence": 1
      },
      {
        "rank": 75,
        "slug": "llama-4-behemoth",
        "canonicalModelKey": "llama-4-behemoth",
        "model": "Llama 4 Behemoth",
        "creator": "Meta",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 11,
        "rankingEligible": true,
        "overallRank": 120,
        "url": "https://benchlm.ai/models/llama-4-behemoth",
        "markdownUrl": "https://benchlm.ai/md/models/llama-4-behemoth.md",
        "score": 18.8,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 76,
        "slug": "llama-4-scout",
        "canonicalModelKey": "llama-4-scout",
        "model": "Llama 4 Scout",
        "creator": "Meta",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "10M",
        "contextWindowTokens": 10000000,
        "displayScore": 25,
        "rankingEligible": true,
        "overallRank": 103,
        "url": "https://benchlm.ai/models/llama-4-scout",
        "markdownUrl": "https://benchlm.ai/md/models/llama-4-scout.md",
        "score": 15.9,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 77,
        "slug": "gemma-3-27b",
        "canonicalModelKey": "gemma-3-27b",
        "model": "Gemma 3 27B",
        "creator": "Google",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 16,
        "rankingEligible": true,
        "overallRank": 118,
        "url": "https://benchlm.ai/models/gemma-3-27b",
        "markdownUrl": "https://benchlm.ai/md/models/gemma-3-27b.md",
        "score": 13.4,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 78,
        "slug": "grok-3-beta",
        "canonicalModelKey": "grok-3-beta",
        "model": "Grok 3 [Beta]",
        "creator": "xAI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 30,
        "rankingEligible": true,
        "overallRank": 97,
        "url": "https://benchlm.ai/models/grok-3-beta",
        "markdownUrl": "https://benchlm.ai/md/models/grok-3-beta.md",
        "score": 10.9,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 79,
        "slug": "llama-4-maverick",
        "canonicalModelKey": "llama-4-maverick",
        "model": "Llama 4 Maverick",
        "creator": "Meta",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "1M",
        "contextWindowTokens": 1000000,
        "displayScore": 17,
        "rankingEligible": true,
        "overallRank": 117,
        "url": "https://benchlm.ai/models/llama-4-maverick",
        "markdownUrl": "https://benchlm.ai/md/models/llama-4-maverick.md",
        "score": 10.3,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 80,
        "slug": "nova-pro",
        "canonicalModelKey": "nova-pro",
        "model": "Nova Pro",
        "creator": "Amazon",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 10,
        "rankingEligible": true,
        "overallRank": 121,
        "url": "https://benchlm.ai/models/nova-pro",
        "markdownUrl": "https://benchlm.ai/md/models/nova-pro.md",
        "score": 9.7,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 81,
        "slug": "glm-4-5",
        "canonicalModelKey": "glm-4-5",
        "model": "GLM-4.5",
        "creator": "Z.AI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 25,
        "rankingEligible": true,
        "overallRank": 102,
        "url": "https://benchlm.ai/models/glm-4-5",
        "markdownUrl": "https://benchlm.ai/md/models/glm-4-5.md",
        "score": 7.5,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 82,
        "slug": "deepseek-v3-1-reasoning",
        "canonicalModelKey": "deepseek-v3-1-reasoning",
        "model": "DeepSeek V3.1 (Reasoning)",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 29,
        "rankingEligible": true,
        "overallRank": 98,
        "url": "https://benchlm.ai/models/deepseek-v3-1-reasoning",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-v3-1-reasoning.md",
        "score": 7.3,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 83,
        "slug": "deepseek-v3-1",
        "canonicalModelKey": "deepseek-v3-1",
        "model": "DeepSeek V3.1",
        "creator": "DeepSeek",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 24,
        "rankingEligible": true,
        "overallRank": 106,
        "url": "https://benchlm.ai/models/deepseek-v3-1",
        "markdownUrl": "https://benchlm.ai/md/models/deepseek-v3-1.md",
        "score": 5.5,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 84,
        "slug": "glm-4-5-air",
        "canonicalModelKey": "glm-4-5-air",
        "model": "GLM-4.5-Air",
        "creator": "Z.AI",
        "sourceType": "Proprietary",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 18,
        "rankingEligible": true,
        "overallRank": 116,
        "url": "https://benchlm.ai/models/glm-4-5-air",
        "markdownUrl": "https://benchlm.ai/md/models/glm-4-5-air.md",
        "score": 3.6,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 85,
        "slug": "gpt-oss-20b",
        "canonicalModelKey": "gpt-oss-20b",
        "model": "GPT-OSS 20B",
        "creator": "OpenAI",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "128K",
        "contextWindowTokens": 128000,
        "displayScore": 16,
        "rankingEligible": true,
        "overallRank": 119,
        "url": "https://benchlm.ai/models/gpt-oss-20b",
        "markdownUrl": "https://benchlm.ai/md/models/gpt-oss-20b.md",
        "score": 1.9,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 1,
        "scoreConfidence": 1
      },
      {
        "rank": 86,
        "slug": "mistral-7b-v0-3",
        "canonicalModelKey": "mistral-7b-v0-3",
        "model": "Mistral 7B v0.3",
        "creator": "Mistral",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 4,
        "rankingEligible": true,
        "overallRank": 122,
        "url": "https://benchlm.ai/models/mistral-7b-v0-3",
        "markdownUrl": "https://benchlm.ai/md/models/mistral-7b-v0-3.md",
        "score": 1.7,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      },
      {
        "rank": 87,
        "slug": "mistral-8x7b-v0-2",
        "canonicalModelKey": "mistral-8x7b-v0-2",
        "model": "Mistral 8x7B v0.2",
        "creator": "Mistral",
        "sourceType": "Open Weight",
        "reasoningType": "Non-Reasoning",
        "contextWindow": "32K",
        "contextWindowTokens": 32000,
        "displayScore": 1,
        "rankingEligible": true,
        "overallRank": 123,
        "url": "https://benchlm.ai/models/mistral-8x7b-v0-2",
        "markdownUrl": "https://benchlm.ai/md/models/mistral-8x7b-v0-2.md",
        "score": 1.1,
        "category": "math",
        "categoryLabel": "Mathematics",
        "trustedBenchmarkCount": 0,
        "scoreConfidence": 1
      }
    ]
  }
}
