{
  "inputs": {
    "mode": "current",
    "platform": "mac",
    "pcVram": "16",
    "macMemory": "64",
    "systemRam": "32",
    "budget": "1500to3000",
    "useCase": "coding",
    "priority": "speed",
    "gpuFamily": "nvidia",
    "context": "16384"
  },
  "result": {
    "verdict": "Comfortable",
    "tier": 4.75,
    "band": "high",
    "title": "Comfortable for midsize local models",
    "summary": "Strong for daily local use, coding, and experimentation.",
    "picks": [
      {
        "name": "Qwen3-Coder-30B-A3B (MoE, fits 24GB)",
        "why": "3B-active MoE — benchmark champion for local coding at this tier."
      },
      {
        "name": "Qwen 3.5 35B-A3B (generalist MoE)",
        "why": "Often wins real mixed-codebase work over the Coder variant; Apache 2.0."
      },
      {
        "name": "North Mini Code (30B-A3B, Apache 2.0)",
        "why": "Apache-2.0 30B-A3B from Cohere; ~17 GB at Q4 fits a 24 GB card. New alternative to Qwen3-Coder-30B-A3B with a larger 256K context — verify vendor benchmarks on your repo."
      },
      {
        "name": "gpt-oss-20b",
        "why": "OpenAI Apache 2.0; 21B MoE with 3.6B active; near o4-mini on reasoning; fits 16GB."
      }
    ],
    "runner": {
      "name": "Ollama or LM Studio",
      "note": "Both use MLX on Apple Silicon (Ollama added an MLX backend in early 2026); LM Studio still has the smoother UI."
    },
    "quantization": "Q4_K_M at 14B, or MLX-quantized weights for the best Apple Silicon throughput. MoE 30B-A3B still runs at 3B-dense speed.",
    "expected_speed": "30–50 tok/s on 8B (MLX), 15–25 on 14B Q4, 25–45 on 30B-A3B MoE. Long prompts: slow prefill.",
    "workflow": [
      "Wire into your editor — Continue.dev, Cline, or Aider.",
      "Keep a small fast model for autocomplete, a bigger one for review.",
      "Cloud fallback for the heaviest reasoning tasks.",
      "Take the faster path per task — local or cloud."
    ],
    "watchouts": [
      "Mac prefill is slow on long prompts — first token on a 32K prompt can take 60–90s",
      "Very large models still push this setup — quantize aggressively"
    ],
    "note": "Local makes sense for regular coding, chat, and experimentation. For the biggest models or very long context, cloud is the smoother choice."
  },
  "meta": {
    "version": "v1",
    "dated": "June 2026",
    "source": "https://theaibench.ai/",
    "docs": "https://theaibench.ai/api/",
    "license": "Free to cite with attribution to The AI Bench."
  }
}